diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris')
298 files changed, 25 insertions, 196927 deletions
diff --git a/sys/cddl/contrib/opensolaris/common/acl/acl_common.c b/sys/cddl/contrib/opensolaris/common/acl/acl_common.c deleted file mode 100644 index a681905579c6..000000000000 --- a/sys/cddl/contrib/opensolaris/common/acl/acl_common.c +++ /dev/null @@ -1,1765 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/avl.h> -#include <sys/misc.h> -#if defined(_KERNEL) -#include <sys/kmem.h> -#include <sys/systm.h> -#include <sys/sysmacros.h> -#include <acl/acl_common.h> -#include <sys/debug.h> -#else -#include <errno.h> -#include <stdlib.h> -#include <stddef.h> -#include <strings.h> -#include <unistd.h> -#include <assert.h> -#include <grp.h> -#include <pwd.h> -#include <acl_common.h> -#define ASSERT assert -#endif - -#define ACE_POSIX_SUPPORTED_BITS (ACE_READ_DATA | \ - ACE_WRITE_DATA | ACE_APPEND_DATA | ACE_EXECUTE | \ - ACE_READ_ATTRIBUTES | ACE_READ_ACL | ACE_WRITE_ACL) - - -#define ACL_SYNCHRONIZE_SET_DENY 0x0000001 -#define ACL_SYNCHRONIZE_SET_ALLOW 0x0000002 -#define ACL_SYNCHRONIZE_ERR_DENY 0x0000004 -#define ACL_SYNCHRONIZE_ERR_ALLOW 0x0000008 - -#define ACL_WRITE_OWNER_SET_DENY 0x0000010 -#define ACL_WRITE_OWNER_SET_ALLOW 0x0000020 -#define ACL_WRITE_OWNER_ERR_DENY 0x0000040 -#define ACL_WRITE_OWNER_ERR_ALLOW 0x0000080 - -#define ACL_DELETE_SET_DENY 0x0000100 -#define ACL_DELETE_SET_ALLOW 0x0000200 -#define ACL_DELETE_ERR_DENY 0x0000400 -#define ACL_DELETE_ERR_ALLOW 0x0000800 - -#define ACL_WRITE_ATTRS_OWNER_SET_DENY 0x0001000 -#define ACL_WRITE_ATTRS_OWNER_SET_ALLOW 0x0002000 -#define ACL_WRITE_ATTRS_OWNER_ERR_DENY 0x0004000 -#define ACL_WRITE_ATTRS_OWNER_ERR_ALLOW 0x0008000 - -#define ACL_WRITE_ATTRS_WRITER_SET_DENY 0x0010000 -#define ACL_WRITE_ATTRS_WRITER_SET_ALLOW 0x0020000 -#define ACL_WRITE_ATTRS_WRITER_ERR_DENY 0x0040000 -#define ACL_WRITE_ATTRS_WRITER_ERR_ALLOW 0x0080000 - -#define ACL_WRITE_NAMED_WRITER_SET_DENY 0x0100000 -#define ACL_WRITE_NAMED_WRITER_SET_ALLOW 0x0200000 -#define ACL_WRITE_NAMED_WRITER_ERR_DENY 0x0400000 -#define ACL_WRITE_NAMED_WRITER_ERR_ALLOW 0x0800000 - -#define ACL_READ_NAMED_READER_SET_DENY 0x1000000 -#define ACL_READ_NAMED_READER_SET_ALLOW 0x2000000 -#define ACL_READ_NAMED_READER_ERR_DENY 0x4000000 -#define ACL_READ_NAMED_READER_ERR_ALLOW 0x8000000 - - -#define ACE_VALID_MASK_BITS (\ - ACE_READ_DATA | \ - ACE_LIST_DIRECTORY | \ - ACE_WRITE_DATA | \ - ACE_ADD_FILE | \ - ACE_APPEND_DATA | \ - ACE_ADD_SUBDIRECTORY | \ - ACE_READ_NAMED_ATTRS | \ - ACE_WRITE_NAMED_ATTRS | \ - ACE_EXECUTE | \ - ACE_DELETE_CHILD | \ - ACE_READ_ATTRIBUTES | \ - ACE_WRITE_ATTRIBUTES | \ - ACE_DELETE | \ - ACE_READ_ACL | \ - ACE_WRITE_ACL | \ - ACE_WRITE_OWNER | \ - ACE_SYNCHRONIZE) - -#define ACE_MASK_UNDEFINED 0x80000000 - -#define ACE_VALID_FLAG_BITS (ACE_FILE_INHERIT_ACE | \ - ACE_DIRECTORY_INHERIT_ACE | \ - ACE_NO_PROPAGATE_INHERIT_ACE | ACE_INHERIT_ONLY_ACE | \ - ACE_SUCCESSFUL_ACCESS_ACE_FLAG | ACE_FAILED_ACCESS_ACE_FLAG | \ - ACE_IDENTIFIER_GROUP | ACE_OWNER | ACE_GROUP | ACE_EVERYONE) - -/* - * ACL conversion helpers - */ - -typedef enum { - ace_unused, - ace_user_obj, - ace_user, - ace_group, /* includes GROUP and GROUP_OBJ */ - ace_other_obj -} ace_to_aent_state_t; - -typedef struct acevals { - uid_t key; - avl_node_t avl; - uint32_t mask; - uint32_t allowed; - uint32_t denied; - int aent_type; -} acevals_t; - -typedef struct ace_list { - acevals_t user_obj; - avl_tree_t user; - int numusers; - acevals_t group_obj; - avl_tree_t group; - int numgroups; - acevals_t other_obj; - uint32_t acl_mask; - int hasmask; - int dfacl_flag; - ace_to_aent_state_t state; - int seen; /* bitmask of all aclent_t a_type values seen */ -} ace_list_t; - -/* - * Generic shellsort, from K&R (1st ed, p 58.), somewhat modified. - * v = Ptr to array/vector of objs - * n = # objs in the array - * s = size of each obj (must be multiples of a word size) - * f = ptr to function to compare two objs - * returns (-1 = less than, 0 = equal, 1 = greater than - */ -void -ksort(caddr_t v, int n, int s, int (*f)()) -{ - int g, i, j, ii; - unsigned int *p1, *p2; - unsigned int tmp; - - /* No work to do */ - if (v == NULL || n <= 1) - return; - - /* Sanity check on arguments */ - ASSERT(((uintptr_t)v & 0x3) == 0 && (s & 0x3) == 0); - ASSERT(s > 0); - for (g = n / 2; g > 0; g /= 2) { - for (i = g; i < n; i++) { - for (j = i - g; j >= 0 && - (*f)(v + j * s, v + (j + g) * s) == 1; - j -= g) { - p1 = (void *)(v + j * s); - p2 = (void *)(v + (j + g) * s); - for (ii = 0; ii < s / 4; ii++) { - tmp = *p1; - *p1++ = *p2; - *p2++ = tmp; - } - } - } - } -} - -/* - * Compare two acls, all fields. Returns: - * -1 (less than) - * 0 (equal) - * +1 (greater than) - */ -int -cmp2acls(void *a, void *b) -{ - aclent_t *x = (aclent_t *)a; - aclent_t *y = (aclent_t *)b; - - /* Compare types */ - if (x->a_type < y->a_type) - return (-1); - if (x->a_type > y->a_type) - return (1); - /* Equal types; compare id's */ - if (x->a_id < y->a_id) - return (-1); - if (x->a_id > y->a_id) - return (1); - /* Equal ids; compare perms */ - if (x->a_perm < y->a_perm) - return (-1); - if (x->a_perm > y->a_perm) - return (1); - /* Totally equal */ - return (0); -} - -/*ARGSUSED*/ -static void * -cacl_realloc(void *ptr, size_t size, size_t new_size) -{ -#if defined(_KERNEL) - void *tmp; - - tmp = kmem_alloc(new_size, KM_SLEEP); - (void) memcpy(tmp, ptr, (size < new_size) ? size : new_size); - kmem_free(ptr, size); - return (tmp); -#else - return (realloc(ptr, new_size)); -#endif -} - -static int -cacl_malloc(void **ptr, size_t size) -{ -#if defined(_KERNEL) - *ptr = kmem_zalloc(size, KM_SLEEP); - return (0); -#else - *ptr = calloc(1, size); - if (*ptr == NULL) - return (errno); - - return (0); -#endif -} - -/*ARGSUSED*/ -static void -cacl_free(void *ptr, size_t size) -{ -#if defined(_KERNEL) - kmem_free(ptr, size); -#else - free(ptr); -#endif -} - -#if !defined(_KERNEL) -acl_t * -acl_alloc(enum acl_type type) -{ - acl_t *aclp; - - if (cacl_malloc((void **)&aclp, sizeof (acl_t)) != 0) - return (NULL); - - aclp->acl_aclp = NULL; - aclp->acl_cnt = 0; - - switch (type) { - case ACE_T: - aclp->acl_type = ACE_T; - aclp->acl_entry_size = sizeof (ace_t); - break; - case ACLENT_T: - aclp->acl_type = ACLENT_T; - aclp->acl_entry_size = sizeof (aclent_t); - break; - default: - acl_free(aclp); - aclp = NULL; - } - return (aclp); -} - -/* - * Free acl_t structure - */ -void -acl_free(acl_t *aclp) -{ - int acl_size; - - if (aclp == NULL) - return; - - if (aclp->acl_aclp) { - acl_size = aclp->acl_cnt * aclp->acl_entry_size; - cacl_free(aclp->acl_aclp, acl_size); - } - - cacl_free(aclp, sizeof (acl_t)); -} - -static uint32_t -access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow) -{ - uint32_t access_mask = 0; - int acl_produce; - int synchronize_set = 0, write_owner_set = 0; - int delete_set = 0, write_attrs_set = 0; - int read_named_set = 0, write_named_set = 0; - - acl_produce = (ACL_SYNCHRONIZE_SET_ALLOW | - ACL_WRITE_ATTRS_OWNER_SET_ALLOW | - ACL_WRITE_ATTRS_WRITER_SET_DENY); - - if (isallow) { - synchronize_set = ACL_SYNCHRONIZE_SET_ALLOW; - write_owner_set = ACL_WRITE_OWNER_SET_ALLOW; - delete_set = ACL_DELETE_SET_ALLOW; - if (hasreadperm) - read_named_set = ACL_READ_NAMED_READER_SET_ALLOW; - if (haswriteperm) - write_named_set = ACL_WRITE_NAMED_WRITER_SET_ALLOW; - if (isowner) - write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; - else if (haswriteperm) - write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; - } else { - - synchronize_set = ACL_SYNCHRONIZE_SET_DENY; - write_owner_set = ACL_WRITE_OWNER_SET_DENY; - delete_set = ACL_DELETE_SET_DENY; - if (hasreadperm) - read_named_set = ACL_READ_NAMED_READER_SET_DENY; - if (haswriteperm) - write_named_set = ACL_WRITE_NAMED_WRITER_SET_DENY; - if (isowner) - write_attrs_set = ACL_WRITE_ATTRS_OWNER_SET_DENY; - else if (haswriteperm) - write_attrs_set = ACL_WRITE_ATTRS_WRITER_SET_DENY; - else - /* - * If the entity is not the owner and does not - * have write permissions ACE_WRITE_ATTRIBUTES will - * always go in the DENY ACE. - */ - access_mask |= ACE_WRITE_ATTRIBUTES; - } - - if (acl_produce & synchronize_set) - access_mask |= ACE_SYNCHRONIZE; - if (acl_produce & write_owner_set) - access_mask |= ACE_WRITE_OWNER; - if (acl_produce & delete_set) - access_mask |= ACE_DELETE; - if (acl_produce & write_attrs_set) - access_mask |= ACE_WRITE_ATTRIBUTES; - if (acl_produce & read_named_set) - access_mask |= ACE_READ_NAMED_ATTRS; - if (acl_produce & write_named_set) - access_mask |= ACE_WRITE_NAMED_ATTRS; - - return (access_mask); -} - -/* - * Given an mode_t, convert it into an access_mask as used - * by nfsace, assuming aclent_t -> nfsace semantics. - */ -static uint32_t -mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow) -{ - uint32_t access = 0; - int haswriteperm = 0; - int hasreadperm = 0; - - if (isallow) { - haswriteperm = (mode & S_IWOTH); - hasreadperm = (mode & S_IROTH); - } else { - haswriteperm = !(mode & S_IWOTH); - hasreadperm = !(mode & S_IROTH); - } - - /* - * The following call takes care of correctly setting the following - * mask bits in the access_mask: - * ACE_SYNCHRONIZE, ACE_WRITE_OWNER, ACE_DELETE, - * ACE_WRITE_ATTRIBUTES, ACE_WRITE_NAMED_ATTRS, ACE_READ_NAMED_ATTRS - */ - access = access_mask_set(haswriteperm, hasreadperm, isowner, isallow); - - if (isallow) { - access |= ACE_READ_ACL | ACE_READ_ATTRIBUTES; - if (isowner) - access |= ACE_WRITE_ACL; - } else { - if (! isowner) - access |= ACE_WRITE_ACL; - } - - /* read */ - if (mode & S_IROTH) { - access |= ACE_READ_DATA; - } - /* write */ - if (mode & S_IWOTH) { - access |= ACE_WRITE_DATA | - ACE_APPEND_DATA; - if (isdir) - access |= ACE_DELETE_CHILD; - } - /* exec */ - if (mode & S_IXOTH) { - access |= ACE_EXECUTE; - } - - return (access); -} - -/* - * Given an nfsace (presumably an ALLOW entry), make a - * corresponding DENY entry at the address given. - */ -static void -ace_make_deny(ace_t *allow, ace_t *deny, int isdir, int isowner) -{ - (void) memcpy(deny, allow, sizeof (ace_t)); - - deny->a_who = allow->a_who; - - deny->a_type = ACE_ACCESS_DENIED_ACE_TYPE; - deny->a_access_mask ^= ACE_POSIX_SUPPORTED_BITS; - if (isdir) - deny->a_access_mask ^= ACE_DELETE_CHILD; - - deny->a_access_mask &= ~(ACE_SYNCHRONIZE | ACE_WRITE_OWNER | - ACE_DELETE | ACE_WRITE_ATTRIBUTES | ACE_READ_NAMED_ATTRS | - ACE_WRITE_NAMED_ATTRS); - deny->a_access_mask |= access_mask_set((allow->a_access_mask & - ACE_WRITE_DATA), (allow->a_access_mask & ACE_READ_DATA), isowner, - B_FALSE); -} -/* - * Make an initial pass over an array of aclent_t's. Gather - * information such as an ACL_MASK (if any), number of users, - * number of groups, and whether the array needs to be sorted. - */ -static int -ln_aent_preprocess(aclent_t *aclent, int n, - int *hasmask, mode_t *mask, - int *numuser, int *numgroup, int *needsort) -{ - int error = 0; - int i; - int curtype = 0; - - *hasmask = 0; - *mask = 07; - *needsort = 0; - *numuser = 0; - *numgroup = 0; - - for (i = 0; i < n; i++) { - if (aclent[i].a_type < curtype) - *needsort = 1; - else if (aclent[i].a_type > curtype) - curtype = aclent[i].a_type; - if (aclent[i].a_type & USER) - (*numuser)++; - if (aclent[i].a_type & (GROUP | GROUP_OBJ)) - (*numgroup)++; - if (aclent[i].a_type & CLASS_OBJ) { - if (*hasmask) { - error = EINVAL; - goto out; - } else { - *hasmask = 1; - *mask = aclent[i].a_perm; - } - } - } - - if ((! *hasmask) && (*numuser + *numgroup > 1)) { - error = EINVAL; - goto out; - } - -out: - return (error); -} - -/* - * Convert an array of aclent_t into an array of nfsace entries, - * following POSIX draft -> nfsv4 conversion semantics as outlined in - * the IETF draft. - */ -static int -ln_aent_to_ace(aclent_t *aclent, int n, ace_t **acepp, int *rescount, int isdir) -{ - int error = 0; - mode_t mask; - int numuser, numgroup, needsort; - int resultsize = 0; - int i, groupi = 0, skip; - ace_t *acep, *result = NULL; - int hasmask; - - error = ln_aent_preprocess(aclent, n, &hasmask, &mask, - &numuser, &numgroup, &needsort); - if (error != 0) - goto out; - - /* allow + deny for each aclent */ - resultsize = n * 2; - if (hasmask) { - /* - * stick extra deny on the group_obj and on each - * user|group for the mask (the group_obj was added - * into the count for numgroup) - */ - resultsize += numuser + numgroup; - /* ... and don't count the mask itself */ - resultsize -= 2; - } - - /* sort the source if necessary */ - if (needsort) - ksort((caddr_t)aclent, n, sizeof (aclent_t), cmp2acls); - - if (cacl_malloc((void **)&result, resultsize * sizeof (ace_t)) != 0) - goto out; - - acep = result; - - for (i = 0; i < n; i++) { - /* - * don't process CLASS_OBJ (mask); mask was grabbed in - * ln_aent_preprocess() - */ - if (aclent[i].a_type & CLASS_OBJ) - continue; - - /* If we need an ACL_MASK emulator, prepend it now */ - if ((hasmask) && - (aclent[i].a_type & (USER | GROUP | GROUP_OBJ))) { - acep->a_type = ACE_ACCESS_DENIED_ACE_TYPE; - acep->a_flags = 0; - if (aclent[i].a_type & GROUP_OBJ) { - acep->a_who = (uid_t)-1; - acep->a_flags |= - (ACE_IDENTIFIER_GROUP|ACE_GROUP); - } else if (aclent[i].a_type & USER) { - acep->a_who = aclent[i].a_id; - } else { - acep->a_who = aclent[i].a_id; - acep->a_flags |= ACE_IDENTIFIER_GROUP; - } - if (aclent[i].a_type & ACL_DEFAULT) { - acep->a_flags |= ACE_INHERIT_ONLY_ACE | - ACE_FILE_INHERIT_ACE | - ACE_DIRECTORY_INHERIT_ACE; - } - /* - * Set the access mask for the prepended deny - * ace. To do this, we invert the mask (found - * in ln_aent_preprocess()) then convert it to an - * DENY ace access_mask. - */ - acep->a_access_mask = mode_to_ace_access((mask ^ 07), - isdir, 0, 0); - acep += 1; - } - - /* handle a_perm -> access_mask */ - acep->a_access_mask = mode_to_ace_access(aclent[i].a_perm, - isdir, aclent[i].a_type & USER_OBJ, 1); - - /* emulate a default aclent */ - if (aclent[i].a_type & ACL_DEFAULT) { - acep->a_flags |= ACE_INHERIT_ONLY_ACE | - ACE_FILE_INHERIT_ACE | - ACE_DIRECTORY_INHERIT_ACE; - } - - /* - * handle a_perm and a_id - * - * this must be done last, since it involves the - * corresponding deny aces, which are handled - * differently for each different a_type. - */ - if (aclent[i].a_type & USER_OBJ) { - acep->a_who = (uid_t)-1; - acep->a_flags |= ACE_OWNER; - ace_make_deny(acep, acep + 1, isdir, B_TRUE); - acep += 2; - } else if (aclent[i].a_type & USER) { - acep->a_who = aclent[i].a_id; - ace_make_deny(acep, acep + 1, isdir, B_FALSE); - acep += 2; - } else if (aclent[i].a_type & (GROUP_OBJ | GROUP)) { - if (aclent[i].a_type & GROUP_OBJ) { - acep->a_who = (uid_t)-1; - acep->a_flags |= ACE_GROUP; - } else { - acep->a_who = aclent[i].a_id; - } - acep->a_flags |= ACE_IDENTIFIER_GROUP; - /* - * Set the corresponding deny for the group ace. - * - * The deny aces go after all of the groups, unlike - * everything else, where they immediately follow - * the allow ace. - * - * We calculate "skip", the number of slots to - * skip ahead for the deny ace, here. - * - * The pattern is: - * MD1 A1 MD2 A2 MD3 A3 D1 D2 D3 - * thus, skip is - * (2 * numgroup) - 1 - groupi - * (2 * numgroup) to account for MD + A - * - 1 to account for the fact that we're on the - * access (A), not the mask (MD) - * - groupi to account for the fact that we have - * passed up groupi number of MD's. - */ - skip = (2 * numgroup) - 1 - groupi; - ace_make_deny(acep, acep + skip, isdir, B_FALSE); - /* - * If we just did the last group, skip acep past - * all of the denies; else, just move ahead one. - */ - if (++groupi >= numgroup) - acep += numgroup + 1; - else - acep += 1; - } else if (aclent[i].a_type & OTHER_OBJ) { - acep->a_who = (uid_t)-1; - acep->a_flags |= ACE_EVERYONE; - ace_make_deny(acep, acep + 1, isdir, B_FALSE); - acep += 2; - } else { - error = EINVAL; - goto out; - } - } - - *acepp = result; - *rescount = resultsize; - -out: - if (error != 0) { - if ((result != NULL) && (resultsize > 0)) { - cacl_free(result, resultsize * sizeof (ace_t)); - } - } - - return (error); -} - -static int -convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir, - ace_t **retacep, int *retacecnt) -{ - ace_t *acep; - ace_t *dfacep; - int acecnt = 0; - int dfacecnt = 0; - int dfaclstart = 0; - int dfaclcnt = 0; - aclent_t *aclp; - int i; - int error; - int acesz, dfacesz; - - ksort((caddr_t)aclentp, aclcnt, sizeof (aclent_t), cmp2acls); - - for (i = 0, aclp = aclentp; i < aclcnt; aclp++, i++) { - if (aclp->a_type & ACL_DEFAULT) - break; - } - - if (i < aclcnt) { - dfaclstart = i; - dfaclcnt = aclcnt - i; - } - - if (dfaclcnt && !isdir) { - return (EINVAL); - } - - error = ln_aent_to_ace(aclentp, i, &acep, &acecnt, isdir); - if (error) - return (error); - - if (dfaclcnt) { - error = ln_aent_to_ace(&aclentp[dfaclstart], dfaclcnt, - &dfacep, &dfacecnt, isdir); - if (error) { - if (acep) { - cacl_free(acep, acecnt * sizeof (ace_t)); - } - return (error); - } - } - - if (dfacecnt != 0) { - acesz = sizeof (ace_t) * acecnt; - dfacesz = sizeof (ace_t) * dfacecnt; - acep = cacl_realloc(acep, acesz, acesz + dfacesz); - if (acep == NULL) - return (ENOMEM); - if (dfaclcnt) { - (void) memcpy(acep + acecnt, dfacep, dfacesz); - } - } - if (dfaclcnt) - cacl_free(dfacep, dfacecnt * sizeof (ace_t)); - - *retacecnt = acecnt + dfacecnt; - *retacep = acep; - return (0); -} - -static int -ace_mask_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) -{ - int error = 0; - o_mode_t mode = 0; - uint32_t bits, wantbits; - - /* read */ - if (mask & ACE_READ_DATA) - mode |= S_IROTH; - - /* write */ - wantbits = (ACE_WRITE_DATA | ACE_APPEND_DATA); - if (isdir) - wantbits |= ACE_DELETE_CHILD; - bits = mask & wantbits; - if (bits != 0) { - if (bits != wantbits) { - error = ENOTSUP; - goto out; - } - mode |= S_IWOTH; - } - - /* exec */ - if (mask & ACE_EXECUTE) { - mode |= S_IXOTH; - } - - *modep = mode; - -out: - return (error); -} - -static void -acevals_init(acevals_t *vals, uid_t key) -{ - bzero(vals, sizeof (*vals)); - vals->allowed = ACE_MASK_UNDEFINED; - vals->denied = ACE_MASK_UNDEFINED; - vals->mask = ACE_MASK_UNDEFINED; - vals->key = key; -} - -static void -ace_list_init(ace_list_t *al, int dfacl_flag) -{ - acevals_init(&al->user_obj, 0); - acevals_init(&al->group_obj, 0); - acevals_init(&al->other_obj, 0); - al->numusers = 0; - al->numgroups = 0; - al->acl_mask = 0; - al->hasmask = 0; - al->state = ace_unused; - al->seen = 0; - al->dfacl_flag = dfacl_flag; -} - -/* - * Find or create an acevals holder for a given id and avl tree. - * - * Note that only one thread will ever touch these avl trees, so - * there is no need for locking. - */ -static acevals_t * -acevals_find(ace_t *ace, avl_tree_t *avl, int *num) -{ - acevals_t key, *rc; - avl_index_t where; - - key.key = ace->a_who; - rc = avl_find(avl, &key, &where); - if (rc != NULL) - return (rc); - - /* this memory is freed by ln_ace_to_aent()->ace_list_free() */ - if (cacl_malloc((void **)&rc, sizeof (acevals_t)) != 0) - return (NULL); - - acevals_init(rc, ace->a_who); - avl_insert(avl, rc, where); - (*num)++; - - return (rc); -} - -static int -access_mask_check(ace_t *acep, int mask_bit, int isowner) -{ - int set_deny, err_deny; - int set_allow, err_allow; - int acl_consume; - int haswriteperm, hasreadperm; - - if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { - haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 0 : 1; - hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 0 : 1; - } else { - haswriteperm = (acep->a_access_mask & ACE_WRITE_DATA) ? 1 : 0; - hasreadperm = (acep->a_access_mask & ACE_READ_DATA) ? 1 : 0; - } - - acl_consume = (ACL_SYNCHRONIZE_ERR_DENY | - ACL_DELETE_ERR_DENY | - ACL_WRITE_OWNER_ERR_DENY | - ACL_WRITE_OWNER_ERR_ALLOW | - ACL_WRITE_ATTRS_OWNER_SET_ALLOW | - ACL_WRITE_ATTRS_OWNER_ERR_DENY | - ACL_WRITE_ATTRS_WRITER_SET_DENY | - ACL_WRITE_ATTRS_WRITER_ERR_ALLOW | - ACL_WRITE_NAMED_WRITER_ERR_DENY | - ACL_READ_NAMED_READER_ERR_DENY); - - if (mask_bit == ACE_SYNCHRONIZE) { - set_deny = ACL_SYNCHRONIZE_SET_DENY; - err_deny = ACL_SYNCHRONIZE_ERR_DENY; - set_allow = ACL_SYNCHRONIZE_SET_ALLOW; - err_allow = ACL_SYNCHRONIZE_ERR_ALLOW; - } else if (mask_bit == ACE_WRITE_OWNER) { - set_deny = ACL_WRITE_OWNER_SET_DENY; - err_deny = ACL_WRITE_OWNER_ERR_DENY; - set_allow = ACL_WRITE_OWNER_SET_ALLOW; - err_allow = ACL_WRITE_OWNER_ERR_ALLOW; - } else if (mask_bit == ACE_DELETE) { - set_deny = ACL_DELETE_SET_DENY; - err_deny = ACL_DELETE_ERR_DENY; - set_allow = ACL_DELETE_SET_ALLOW; - err_allow = ACL_DELETE_ERR_ALLOW; - } else if (mask_bit == ACE_WRITE_ATTRIBUTES) { - if (isowner) { - set_deny = ACL_WRITE_ATTRS_OWNER_SET_DENY; - err_deny = ACL_WRITE_ATTRS_OWNER_ERR_DENY; - set_allow = ACL_WRITE_ATTRS_OWNER_SET_ALLOW; - err_allow = ACL_WRITE_ATTRS_OWNER_ERR_ALLOW; - } else if (haswriteperm) { - set_deny = ACL_WRITE_ATTRS_WRITER_SET_DENY; - err_deny = ACL_WRITE_ATTRS_WRITER_ERR_DENY; - set_allow = ACL_WRITE_ATTRS_WRITER_SET_ALLOW; - err_allow = ACL_WRITE_ATTRS_WRITER_ERR_ALLOW; - } else { - if ((acep->a_access_mask & mask_bit) && - (acep->a_type & ACE_ACCESS_ALLOWED_ACE_TYPE)) { - return (ENOTSUP); - } - return (0); - } - } else if (mask_bit == ACE_READ_NAMED_ATTRS) { - if (!hasreadperm) - return (0); - - set_deny = ACL_READ_NAMED_READER_SET_DENY; - err_deny = ACL_READ_NAMED_READER_ERR_DENY; - set_allow = ACL_READ_NAMED_READER_SET_ALLOW; - err_allow = ACL_READ_NAMED_READER_ERR_ALLOW; - } else if (mask_bit == ACE_WRITE_NAMED_ATTRS) { - if (!haswriteperm) - return (0); - - set_deny = ACL_WRITE_NAMED_WRITER_SET_DENY; - err_deny = ACL_WRITE_NAMED_WRITER_ERR_DENY; - set_allow = ACL_WRITE_NAMED_WRITER_SET_ALLOW; - err_allow = ACL_WRITE_NAMED_WRITER_ERR_ALLOW; - } else { - return (EINVAL); - } - - if (acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) { - if (acl_consume & set_deny) { - if (!(acep->a_access_mask & mask_bit)) { - return (ENOTSUP); - } - } else if (acl_consume & err_deny) { - if (acep->a_access_mask & mask_bit) { - return (ENOTSUP); - } - } - } else { - /* ACE_ACCESS_ALLOWED_ACE_TYPE */ - if (acl_consume & set_allow) { - if (!(acep->a_access_mask & mask_bit)) { - return (ENOTSUP); - } - } else if (acl_consume & err_allow) { - if (acep->a_access_mask & mask_bit) { - return (ENOTSUP); - } - } - } - return (0); -} - -static int -ace_to_aent_legal(ace_t *acep) -{ - int error = 0; - int isowner; - - /* only ALLOW or DENY */ - if ((acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE) && - (acep->a_type != ACE_ACCESS_DENIED_ACE_TYPE)) { - error = ENOTSUP; - goto out; - } - - /* check for invalid flags */ - if (acep->a_flags & ~(ACE_VALID_FLAG_BITS)) { - error = EINVAL; - goto out; - } - - /* some flags are illegal */ - if (acep->a_flags & (ACE_SUCCESSFUL_ACCESS_ACE_FLAG | - ACE_FAILED_ACCESS_ACE_FLAG | - ACE_NO_PROPAGATE_INHERIT_ACE)) { - error = ENOTSUP; - goto out; - } - - /* check for invalid masks */ - if (acep->a_access_mask & ~(ACE_VALID_MASK_BITS)) { - error = EINVAL; - goto out; - } - - if ((acep->a_flags & ACE_OWNER)) { - isowner = 1; - } else { - isowner = 0; - } - - error = access_mask_check(acep, ACE_SYNCHRONIZE, isowner); - if (error) - goto out; - - error = access_mask_check(acep, ACE_WRITE_OWNER, isowner); - if (error) - goto out; - - error = access_mask_check(acep, ACE_DELETE, isowner); - if (error) - goto out; - - error = access_mask_check(acep, ACE_WRITE_ATTRIBUTES, isowner); - if (error) - goto out; - - error = access_mask_check(acep, ACE_READ_NAMED_ATTRS, isowner); - if (error) - goto out; - - error = access_mask_check(acep, ACE_WRITE_NAMED_ATTRS, isowner); - if (error) - goto out; - - /* more detailed checking of masks */ - if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { - if (! (acep->a_access_mask & ACE_READ_ATTRIBUTES)) { - error = ENOTSUP; - goto out; - } - if ((acep->a_access_mask & ACE_WRITE_DATA) && - (! (acep->a_access_mask & ACE_APPEND_DATA))) { - error = ENOTSUP; - goto out; - } - if ((! (acep->a_access_mask & ACE_WRITE_DATA)) && - (acep->a_access_mask & ACE_APPEND_DATA)) { - error = ENOTSUP; - goto out; - } - } - - /* ACL enforcement */ - if ((acep->a_access_mask & ACE_READ_ACL) && - (acep->a_type != ACE_ACCESS_ALLOWED_ACE_TYPE)) { - error = ENOTSUP; - goto out; - } - if (acep->a_access_mask & ACE_WRITE_ACL) { - if ((acep->a_type == ACE_ACCESS_DENIED_ACE_TYPE) && - (isowner)) { - error = ENOTSUP; - goto out; - } - if ((acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) && - (! isowner)) { - error = ENOTSUP; - goto out; - } - } - -out: - return (error); -} - -static int -ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir) -{ - /* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */ - if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) != - (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) { - return (ENOTSUP); - } - - return (ace_mask_to_mode(mask, modep, isdir)); -} - -static int -acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list, - uid_t owner, gid_t group, boolean_t isdir) -{ - int error; - uint32_t flips = ACE_POSIX_SUPPORTED_BITS; - - if (isdir) - flips |= ACE_DELETE_CHILD; - if (vals->allowed != (vals->denied ^ flips)) { - error = ENOTSUP; - goto out; - } - if ((list->hasmask) && (list->acl_mask != vals->mask) && - (vals->aent_type & (USER | GROUP | GROUP_OBJ))) { - error = ENOTSUP; - goto out; - } - error = ace_allow_to_mode(vals->allowed, &dest->a_perm, isdir); - if (error != 0) - goto out; - dest->a_type = vals->aent_type; - if (dest->a_type & (USER | GROUP)) { - dest->a_id = vals->key; - } else if (dest->a_type & USER_OBJ) { - dest->a_id = owner; - } else if (dest->a_type & GROUP_OBJ) { - dest->a_id = group; - } else if (dest->a_type & OTHER_OBJ) { - dest->a_id = 0; - } else { - error = EINVAL; - goto out; - } - -out: - return (error); -} - - -static int -ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt, - uid_t owner, gid_t group, boolean_t isdir) -{ - int error = 0; - aclent_t *aent, *result = NULL; - acevals_t *vals; - int resultcount; - - if ((list->seen & (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) != - (USER_OBJ | GROUP_OBJ | OTHER_OBJ)) { - error = ENOTSUP; - goto out; - } - if ((! list->hasmask) && (list->numusers + list->numgroups > 0)) { - error = ENOTSUP; - goto out; - } - - resultcount = 3 + list->numusers + list->numgroups; - /* - * This must be the same condition as below, when we add the CLASS_OBJ - * (aka ACL mask) - */ - if ((list->hasmask) || (! list->dfacl_flag)) - resultcount += 1; - - if (cacl_malloc((void **)&result, - resultcount * sizeof (aclent_t)) != 0) { - error = ENOMEM; - goto out; - } - aent = result; - - /* USER_OBJ */ - if (!(list->user_obj.aent_type & USER_OBJ)) { - error = EINVAL; - goto out; - } - - error = acevals_to_aent(&list->user_obj, aent, list, owner, group, - isdir); - - if (error != 0) - goto out; - ++aent; - /* USER */ - vals = NULL; - for (vals = avl_first(&list->user); vals != NULL; - vals = AVL_NEXT(&list->user, vals)) { - if (!(vals->aent_type & USER)) { - error = EINVAL; - goto out; - } - error = acevals_to_aent(vals, aent, list, owner, group, - isdir); - if (error != 0) - goto out; - ++aent; - } - /* GROUP_OBJ */ - if (!(list->group_obj.aent_type & GROUP_OBJ)) { - error = EINVAL; - goto out; - } - error = acevals_to_aent(&list->group_obj, aent, list, owner, group, - isdir); - if (error != 0) - goto out; - ++aent; - /* GROUP */ - vals = NULL; - for (vals = avl_first(&list->group); vals != NULL; - vals = AVL_NEXT(&list->group, vals)) { - if (!(vals->aent_type & GROUP)) { - error = EINVAL; - goto out; - } - error = acevals_to_aent(vals, aent, list, owner, group, - isdir); - if (error != 0) - goto out; - ++aent; - } - /* - * CLASS_OBJ (aka ACL_MASK) - * - * An ACL_MASK is not fabricated if the ACL is a default ACL. - * This is to follow UFS's behavior. - */ - if ((list->hasmask) || (! list->dfacl_flag)) { - if (list->hasmask) { - uint32_t flips = ACE_POSIX_SUPPORTED_BITS; - if (isdir) - flips |= ACE_DELETE_CHILD; - error = ace_mask_to_mode(list->acl_mask ^ flips, - &aent->a_perm, isdir); - if (error != 0) - goto out; - } else { - /* fabricate the ACL_MASK from the group permissions */ - error = ace_mask_to_mode(list->group_obj.allowed, - &aent->a_perm, isdir); - if (error != 0) - goto out; - } - aent->a_id = 0; - aent->a_type = CLASS_OBJ | list->dfacl_flag; - ++aent; - } - /* OTHER_OBJ */ - if (!(list->other_obj.aent_type & OTHER_OBJ)) { - error = EINVAL; - goto out; - } - error = acevals_to_aent(&list->other_obj, aent, list, owner, group, - isdir); - if (error != 0) - goto out; - ++aent; - - *aclentp = result; - *aclcnt = resultcount; - -out: - if (error != 0) { - if (result != NULL) - cacl_free(result, resultcount * sizeof (aclent_t)); - } - - return (error); -} - - -/* - * free all data associated with an ace_list - */ -static void -ace_list_free(ace_list_t *al) -{ - acevals_t *node; - void *cookie; - - if (al == NULL) - return; - - cookie = NULL; - while ((node = avl_destroy_nodes(&al->user, &cookie)) != NULL) - cacl_free(node, sizeof (acevals_t)); - cookie = NULL; - while ((node = avl_destroy_nodes(&al->group, &cookie)) != NULL) - cacl_free(node, sizeof (acevals_t)); - - avl_destroy(&al->user); - avl_destroy(&al->group); - - /* free the container itself */ - cacl_free(al, sizeof (ace_list_t)); -} - -static int -acevals_compare(const void *va, const void *vb) -{ - const acevals_t *a = va, *b = vb; - - if (a->key == b->key) - return (0); - - if (a->key > b->key) - return (1); - - else - return (-1); -} - -/* - * Convert a list of ace_t entries to equivalent regular and default - * aclent_t lists. Return error (ENOTSUP) when conversion is not possible. - */ -static int -ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group, - aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt, - boolean_t isdir) -{ - int error = 0; - ace_t *acep; - uint32_t bits; - int i; - ace_list_t *normacl = NULL, *dfacl = NULL, *acl; - acevals_t *vals; - - *aclentp = NULL; - *aclcnt = 0; - *dfaclentp = NULL; - *dfaclcnt = 0; - - /* we need at least user_obj, group_obj, and other_obj */ - if (n < 6) { - error = ENOTSUP; - goto out; - } - if (ace == NULL) { - error = EINVAL; - goto out; - } - - error = cacl_malloc((void **)&normacl, sizeof (ace_list_t)); - if (error != 0) - goto out; - - avl_create(&normacl->user, acevals_compare, sizeof (acevals_t), - offsetof(acevals_t, avl)); - avl_create(&normacl->group, acevals_compare, sizeof (acevals_t), - offsetof(acevals_t, avl)); - - ace_list_init(normacl, 0); - - error = cacl_malloc((void **)&dfacl, sizeof (ace_list_t)); - if (error != 0) - goto out; - - avl_create(&dfacl->user, acevals_compare, sizeof (acevals_t), - offsetof(acevals_t, avl)); - avl_create(&dfacl->group, acevals_compare, sizeof (acevals_t), - offsetof(acevals_t, avl)); - ace_list_init(dfacl, ACL_DEFAULT); - - /* process every ace_t... */ - for (i = 0; i < n; i++) { - acep = &ace[i]; - - /* rule out certain cases quickly */ - error = ace_to_aent_legal(acep); - if (error != 0) - goto out; - - /* - * Turn off these bits in order to not have to worry about - * them when doing the checks for compliments. - */ - acep->a_access_mask &= ~(ACE_WRITE_OWNER | ACE_DELETE | - ACE_SYNCHRONIZE | ACE_WRITE_ATTRIBUTES | - ACE_READ_NAMED_ATTRS | ACE_WRITE_NAMED_ATTRS); - - /* see if this should be a regular or default acl */ - bits = acep->a_flags & - (ACE_INHERIT_ONLY_ACE | - ACE_FILE_INHERIT_ACE | - ACE_DIRECTORY_INHERIT_ACE); - if (bits != 0) { - /* all or nothing on these inherit bits */ - if (bits != (ACE_INHERIT_ONLY_ACE | - ACE_FILE_INHERIT_ACE | - ACE_DIRECTORY_INHERIT_ACE)) { - error = ENOTSUP; - goto out; - } - acl = dfacl; - } else { - acl = normacl; - } - - if ((acep->a_flags & ACE_OWNER)) { - if (acl->state > ace_user_obj) { - error = ENOTSUP; - goto out; - } - acl->state = ace_user_obj; - acl->seen |= USER_OBJ; - vals = &acl->user_obj; - vals->aent_type = USER_OBJ | acl->dfacl_flag; - } else if ((acep->a_flags & ACE_EVERYONE)) { - acl->state = ace_other_obj; - acl->seen |= OTHER_OBJ; - vals = &acl->other_obj; - vals->aent_type = OTHER_OBJ | acl->dfacl_flag; - } else if (acep->a_flags & ACE_IDENTIFIER_GROUP) { - if (acl->state > ace_group) { - error = ENOTSUP; - goto out; - } - if ((acep->a_flags & ACE_GROUP)) { - acl->seen |= GROUP_OBJ; - vals = &acl->group_obj; - vals->aent_type = GROUP_OBJ | acl->dfacl_flag; - } else { - acl->seen |= GROUP; - vals = acevals_find(acep, &acl->group, - &acl->numgroups); - if (vals == NULL) { - error = ENOMEM; - goto out; - } - vals->aent_type = GROUP | acl->dfacl_flag; - } - acl->state = ace_group; - } else { - if (acl->state > ace_user) { - error = ENOTSUP; - goto out; - } - acl->state = ace_user; - acl->seen |= USER; - vals = acevals_find(acep, &acl->user, - &acl->numusers); - if (vals == NULL) { - error = ENOMEM; - goto out; - } - vals->aent_type = USER | acl->dfacl_flag; - } - - if (!(acl->state > ace_unused)) { - error = EINVAL; - goto out; - } - - if (acep->a_type == ACE_ACCESS_ALLOWED_ACE_TYPE) { - /* no more than one allowed per aclent_t */ - if (vals->allowed != ACE_MASK_UNDEFINED) { - error = ENOTSUP; - goto out; - } - vals->allowed = acep->a_access_mask; - } else { - /* - * it's a DENY; if there was a previous DENY, it - * must have been an ACL_MASK. - */ - if (vals->denied != ACE_MASK_UNDEFINED) { - /* ACL_MASK is for USER and GROUP only */ - if ((acl->state != ace_user) && - (acl->state != ace_group)) { - error = ENOTSUP; - goto out; - } - - if (! acl->hasmask) { - acl->hasmask = 1; - acl->acl_mask = vals->denied; - /* check for mismatched ACL_MASK emulations */ - } else if (acl->acl_mask != vals->denied) { - error = ENOTSUP; - goto out; - } - vals->mask = vals->denied; - } - vals->denied = acep->a_access_mask; - } - } - - /* done collating; produce the aclent_t lists */ - if (normacl->state != ace_unused) { - error = ace_list_to_aent(normacl, aclentp, aclcnt, - owner, group, isdir); - if (error != 0) { - goto out; - } - } - if (dfacl->state != ace_unused) { - error = ace_list_to_aent(dfacl, dfaclentp, dfaclcnt, - owner, group, isdir); - if (error != 0) { - goto out; - } - } - -out: - if (normacl != NULL) - ace_list_free(normacl); - if (dfacl != NULL) - ace_list_free(dfacl); - - return (error); -} - -static int -convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir, - uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt) -{ - int error = 0; - aclent_t *aclentp, *dfaclentp; - int aclcnt, dfaclcnt; - int aclsz, dfaclsz; - - error = ln_ace_to_aent(acebufp, acecnt, owner, group, - &aclentp, &aclcnt, &dfaclentp, &dfaclcnt, isdir); - - if (error) - return (error); - - - if (dfaclcnt != 0) { - /* - * Slap aclentp and dfaclentp into a single array. - */ - aclsz = sizeof (aclent_t) * aclcnt; - dfaclsz = sizeof (aclent_t) * dfaclcnt; - aclentp = cacl_realloc(aclentp, aclsz, aclsz + dfaclsz); - if (aclentp != NULL) { - (void) memcpy(aclentp + aclcnt, dfaclentp, dfaclsz); - } else { - error = ENOMEM; - } - } - - if (aclentp) { - *retaclentp = aclentp; - *retaclcnt = aclcnt + dfaclcnt; - } - - if (dfaclentp) - cacl_free(dfaclentp, dfaclsz); - - return (error); -} - - -int -acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner, - gid_t group) -{ - int aclcnt; - void *acldata; - int error; - - /* - * See if we need to translate - */ - if ((target_flavor == _ACL_ACE_ENABLED && aclp->acl_type == ACE_T) || - (target_flavor == _ACL_ACLENT_ENABLED && - aclp->acl_type == ACLENT_T)) - return (0); - - if (target_flavor == -1) { - error = EINVAL; - goto out; - } - - if (target_flavor == _ACL_ACE_ENABLED && - aclp->acl_type == ACLENT_T) { - error = convert_aent_to_ace(aclp->acl_aclp, - aclp->acl_cnt, isdir, (ace_t **)&acldata, &aclcnt); - if (error) - goto out; - - } else if (target_flavor == _ACL_ACLENT_ENABLED && - aclp->acl_type == ACE_T) { - error = convert_ace_to_aent(aclp->acl_aclp, aclp->acl_cnt, - isdir, owner, group, (aclent_t **)&acldata, &aclcnt); - if (error) - goto out; - } else { - error = ENOTSUP; - goto out; - } - - /* - * replace old acl with newly translated acl - */ - cacl_free(aclp->acl_aclp, aclp->acl_cnt * aclp->acl_entry_size); - aclp->acl_aclp = acldata; - aclp->acl_cnt = aclcnt; - if (target_flavor == _ACL_ACE_ENABLED) { - aclp->acl_type = ACE_T; - aclp->acl_entry_size = sizeof (ace_t); - } else { - aclp->acl_type = ACLENT_T; - aclp->acl_entry_size = sizeof (aclent_t); - } - return (0); - -out: - -#if !defined(_KERNEL) - errno = error; - return (-1); -#else - return (error); -#endif -} -#endif /* !_KERNEL */ - -#define SET_ACE(acl, index, who, mask, type, flags) { \ - acl[0][index].a_who = (uint32_t)who; \ - acl[0][index].a_type = type; \ - acl[0][index].a_flags = flags; \ - acl[0][index++].a_access_mask = mask; \ -} - -void -acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks) -{ - uint32_t read_mask = ACE_READ_DATA; - uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA; - uint32_t execute_mask = ACE_EXECUTE; - - (void) isdir; /* will need this later */ - - masks->deny1 = 0; - if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) - masks->deny1 |= read_mask; - if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) - masks->deny1 |= write_mask; - if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) - masks->deny1 |= execute_mask; - - masks->deny2 = 0; - if (!(mode & S_IRGRP) && (mode & S_IROTH)) - masks->deny2 |= read_mask; - if (!(mode & S_IWGRP) && (mode & S_IWOTH)) - masks->deny2 |= write_mask; - if (!(mode & S_IXGRP) && (mode & S_IXOTH)) - masks->deny2 |= execute_mask; - - masks->allow0 = 0; - if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) - masks->allow0 |= read_mask; - if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) - masks->allow0 |= write_mask; - if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) - masks->allow0 |= execute_mask; - - masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| - ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| - ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; - if (mode & S_IRUSR) - masks->owner |= read_mask; - if (mode & S_IWUSR) - masks->owner |= write_mask; - if (mode & S_IXUSR) - masks->owner |= execute_mask; - - masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| - ACE_SYNCHRONIZE; - if (mode & S_IRGRP) - masks->group |= read_mask; - if (mode & S_IWGRP) - masks->group |= write_mask; - if (mode & S_IXGRP) - masks->group |= execute_mask; - - masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS| - ACE_SYNCHRONIZE; - if (mode & S_IROTH) - masks->everyone |= read_mask; - if (mode & S_IWOTH) - masks->everyone |= write_mask; - if (mode & S_IXOTH) - masks->everyone |= execute_mask; -} - -int -acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count) -{ - int index = 0; - int error; - trivial_acl_t masks; - - *count = 3; - acl_trivial_access_masks(mode, isdir, &masks); - - if (masks.allow0) - (*count)++; - if (masks.deny1) - (*count)++; - if (masks.deny2) - (*count)++; - - if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0) - return (error); - - if (masks.allow0) { - SET_ACE(acl, index, -1, masks.allow0, - ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER); - } - if (masks.deny1) { - SET_ACE(acl, index, -1, masks.deny1, - ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER); - } - if (masks.deny2) { - SET_ACE(acl, index, -1, masks.deny2, - ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP); - } - - SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE, - ACE_OWNER); - SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE, - ACE_IDENTIFIER_GROUP|ACE_GROUP); - SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE, - ACE_EVERYONE); - - return (0); -} - -/* - * ace_trivial: - * determine whether an ace_t acl is trivial - * - * Trivialness implies that the acl is composed of only - * owner, group, everyone entries. ACL can't - * have read_acl denied, and write_owner/write_acl/write_attributes - * can only be owner@ entry. - */ -int -ace_trivial_common(void *acep, int aclcnt, - uint64_t (*walk)(void *, uint64_t, int aclcnt, - uint16_t *, uint16_t *, uint32_t *)) -{ - uint16_t flags; - uint32_t mask; - uint16_t type; - uint64_t cookie = 0; - - while (cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask)) { - switch (flags & ACE_TYPE_FLAGS) { - case ACE_OWNER: - case ACE_GROUP|ACE_IDENTIFIER_GROUP: - case ACE_EVERYONE: - break; - default: - return (1); - - } - - if (flags & (ACE_FILE_INHERIT_ACE| - ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE| - ACE_INHERIT_ONLY_ACE)) - return (1); - - /* - * Special check for some special bits - * - * Don't allow anybody to deny reading basic - * attributes or a files ACL. - */ - if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && - (type == ACE_ACCESS_DENIED_ACE_TYPE)) - return (1); - - /* - * Delete permissions are never set by default - */ - if (mask & (ACE_DELETE|ACE_DELETE_CHILD)) - return (1); - /* - * only allow owner@ to have - * write_acl/write_owner/write_attributes/write_xattr/ - */ - if (type == ACE_ACCESS_ALLOWED_ACE_TYPE && - (!(flags & ACE_OWNER) && (mask & - (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES| - ACE_WRITE_NAMED_ATTRS)))) - return (1); - - } - return (0); -} - -uint64_t -ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags, - uint16_t *type, uint32_t *mask) -{ - ace_t *acep = datap; - - if (cookie >= aclcnt) - return (0); - - *flags = acep[cookie].a_flags; - *type = acep[cookie].a_type; - *mask = acep[cookie++].a_access_mask; - - return (cookie); -} - -int -ace_trivial(ace_t *acep, int aclcnt) -{ - return (ace_trivial_common(acep, aclcnt, ace_walk)); -} diff --git a/sys/cddl/contrib/opensolaris/common/acl/acl_common.h b/sys/cddl/contrib/opensolaris/common/acl/acl_common.h deleted file mode 100644 index acf1f5da89d6..000000000000 --- a/sys/cddl/contrib/opensolaris/common/acl/acl_common.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - */ - -#ifndef _ACL_COMMON_H -#define _ACL_COMMON_H - -#include <sys/types.h> -#include <sys/acl.h> -#include <sys/stat.h> - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct trivial_acl { - uint32_t allow0; /* allow mask for bits only in owner */ - uint32_t deny1; /* deny mask for bits not in owner */ - uint32_t deny2; /* deny mask for bits not in group */ - uint32_t owner; /* allow mask matching mode */ - uint32_t group; /* allow mask matching mode */ - uint32_t everyone; /* allow mask matching mode */ -} trivial_acl_t; - -extern int acltrivial(const char *); -extern void adjust_ace_pair(ace_t *pair, mode_t mode); -extern void adjust_ace_pair_common(void *, size_t, size_t, mode_t); -extern int ace_trivial(ace_t *acep, int aclcnt); -extern int ace_trivial_common(void *, int, - uint64_t (*walk)(void *, uint64_t, int aclcnt, uint16_t *, uint16_t *, - uint32_t *mask)); -#if !defined(_KERNEL) -extern acl_t *acl_alloc(acl_type_t); -extern void acl_free(acl_t *aclp); -extern int acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, - uid_t owner, gid_t group); -#endif /* !_KERNEL */ -void ksort(caddr_t v, int n, int s, int (*f)()); -int cmp2acls(void *a, void *b); -int acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count); -void acl_trivial_access_masks(mode_t mode, boolean_t isdir, - trivial_acl_t *masks); - -#ifdef __cplusplus -} -#endif - -#endif /* _ACL_COMMON_H */ diff --git a/sys/cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S b/sys/cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S deleted file mode 100644 index bc21e85878df..000000000000 --- a/sys/cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S +++ /dev/null @@ -1,133 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - .file "atomic.s" - -#define _ASM -#include <sys/asm_linkage.h> - - /* - * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever - * separated, it is important to edit the libc i386 platform - * specific mapfile and remove the NODYNSORT attribute - * from atomic_dec_64_nv. - */ - ENTRY(atomic_dec_64) - ALTENTRY(atomic_dec_64_nv) - pushl %edi - pushl %ebx - movl 12(%esp), %edi // %edi = target address - movl (%edi), %eax - movl 4(%edi), %edx // %edx:%eax = old value -1: - xorl %ebx, %ebx - xorl %ecx, %ecx - not %ecx - not %ebx // %ecx:%ebx = -1 - addl %eax, %ebx - adcl %edx, %ecx // add in the carry from inc - lock - cmpxchg8b (%edi) // try to stick it in - jne 1b - movl %ebx, %eax - movl %ecx, %edx // return new value - popl %ebx - popl %edi - ret - SET_SIZE(atomic_dec_64_nv) - SET_SIZE(atomic_dec_64) - - /* - * NOTE: If atomic_add_64 and atomic_add_64_nv are ever - * separated, it is important to edit the libc i386 platform - * specific mapfile and remove the NODYNSORT attribute - * from atomic_add_64_nv. - */ - ENTRY(atomic_add_64) - ALTENTRY(atomic_add_64_nv) - pushl %edi - pushl %ebx - movl 12(%esp), %edi // %edi = target address - movl (%edi), %eax - movl 4(%edi), %edx // %edx:%eax = old value -1: - movl 16(%esp), %ebx - movl 20(%esp), %ecx // %ecx:%ebx = delta - addl %eax, %ebx - adcl %edx, %ecx // %ecx:%ebx = new value - lock - cmpxchg8b (%edi) // try to stick it in - jne 1b - movl %ebx, %eax - movl %ecx, %edx // return new value - popl %ebx - popl %edi - ret - SET_SIZE(atomic_add_64_nv) - SET_SIZE(atomic_add_64) - - ENTRY(atomic_cas_64) - pushl %ebx - pushl %esi - movl 12(%esp), %esi - movl 16(%esp), %eax - movl 20(%esp), %edx - movl 24(%esp), %ebx - movl 28(%esp), %ecx - lock - cmpxchg8b (%esi) - popl %esi - popl %ebx - ret - SET_SIZE(atomic_cas_64) - - ENTRY(atomic_swap_64) - pushl %esi - pushl %ebx - movl 12(%esp), %esi - movl 16(%esp), %ebx - movl 20(%esp), %ecx - movl (%esi), %eax - movl 4(%esi), %edx // %edx:%eax = old value -1: - lock - cmpxchg8b (%esi) - jne 1b - popl %ebx - popl %esi - ret - SET_SIZE(atomic_swap_64) - - ENTRY(atomic_load_64) - pushl %esi - movl 8(%esp), %esi - movl %ebx, %eax // make old and new values equal, so that - movl %ecx, %edx // destination is never changed - lock - cmpxchg8b (%esi) - popl %esi - ret - SET_SIZE(atomic_load_64) diff --git a/sys/cddl/contrib/opensolaris/common/avl/avl.c b/sys/cddl/contrib/opensolaris/common/avl/avl.c deleted file mode 100644 index 2349aba2bf3e..000000000000 --- a/sys/cddl/contrib/opensolaris/common/avl/avl.c +++ /dev/null @@ -1,1063 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2014 by Delphix. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - */ - -/* - * AVL - generic AVL tree implementation for kernel use - * - * A complete description of AVL trees can be found in many CS textbooks. - * - * Here is a very brief overview. An AVL tree is a binary search tree that is - * almost perfectly balanced. By "almost" perfectly balanced, we mean that at - * any given node, the left and right subtrees are allowed to differ in height - * by at most 1 level. - * - * This relaxation from a perfectly balanced binary tree allows doing - * insertion and deletion relatively efficiently. Searching the tree is - * still a fast operation, roughly O(log(N)). - * - * The key to insertion and deletion is a set of tree manipulations called - * rotations, which bring unbalanced subtrees back into the semi-balanced state. - * - * This implementation of AVL trees has the following peculiarities: - * - * - The AVL specific data structures are physically embedded as fields - * in the "using" data structures. To maintain generality the code - * must constantly translate between "avl_node_t *" and containing - * data structure "void *"s by adding/subtracting the avl_offset. - * - * - Since the AVL data is always embedded in other structures, there is - * no locking or memory allocation in the AVL routines. This must be - * provided for by the enclosing data structure's semantics. Typically, - * avl_insert()/_add()/_remove()/avl_insert_here() require some kind of - * exclusive write lock. Other operations require a read lock. - * - * - The implementation uses iteration instead of explicit recursion, - * since it is intended to run on limited size kernel stacks. Since - * there is no recursion stack present to move "up" in the tree, - * there is an explicit "parent" link in the avl_node_t. - * - * - The left/right children pointers of a node are in an array. - * In the code, variables (instead of constants) are used to represent - * left and right indices. The implementation is written as if it only - * dealt with left handed manipulations. By changing the value assigned - * to "left", the code also works for right handed trees. The - * following variables/terms are frequently used: - * - * int left; // 0 when dealing with left children, - * // 1 for dealing with right children - * - * int left_heavy; // -1 when left subtree is taller at some node, - * // +1 when right subtree is taller - * - * int right; // will be the opposite of left (0 or 1) - * int right_heavy;// will be the opposite of left_heavy (-1 or 1) - * - * int direction; // 0 for "<" (ie. left child); 1 for ">" (right) - * - * Though it is a little more confusing to read the code, the approach - * allows using half as much code (and hence cache footprint) for tree - * manipulations and eliminates many conditional branches. - * - * - The avl_index_t is an opaque "cookie" used to find nodes at or - * adjacent to where a new value would be inserted in the tree. The value - * is a modified "avl_node_t *". The bottom bit (normally 0 for a - * pointer) is set to indicate if that the new node has a value greater - * than the value of the indicated "avl_node_t *". - * - * Note - in addition to userland (e.g. libavl and libutil) and the kernel - * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module, - * which each have their own compilation environments and subsequent - * requirements. Each of these environments must be considered when adding - * dependencies from avl.c. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/stdint.h> -#include <sys/debug.h> -#include <sys/avl.h> - -/* - * Small arrays to translate between balance (or diff) values and child indices. - * - * Code that deals with binary tree data structures will randomly use - * left and right children when examining a tree. C "if()" statements - * which evaluate randomly suffer from very poor hardware branch prediction. - * In this code we avoid some of the branch mispredictions by using the - * following translation arrays. They replace random branches with an - * additional memory reference. Since the translation arrays are both very - * small the data should remain efficiently in cache. - */ -static const int avl_child2balance[2] = {-1, 1}; -static const int avl_balance2child[] = {0, 0, 1}; - - -/* - * Walk from one node to the previous valued node (ie. an infix walk - * towards the left). At any given node we do one of 2 things: - * - * - If there is a left child, go to it, then to it's rightmost descendant. - * - * - otherwise we return through parent nodes until we've come from a right - * child. - * - * Return Value: - * NULL - if at the end of the nodes - * otherwise next node - */ -void * -avl_walk(avl_tree_t *tree, void *oldnode, int left) -{ - size_t off = tree->avl_offset; - avl_node_t *node = AVL_DATA2NODE(oldnode, off); - int right = 1 - left; - int was_child; - - - /* - * nowhere to walk to if tree is empty - */ - if (node == NULL) - return (NULL); - - /* - * Visit the previous valued node. There are two possibilities: - * - * If this node has a left child, go down one left, then all - * the way right. - */ - if (node->avl_child[left] != NULL) { - for (node = node->avl_child[left]; - node->avl_child[right] != NULL; - node = node->avl_child[right]) - ; - /* - * Otherwise, return thru left children as far as we can. - */ - } else { - for (;;) { - was_child = AVL_XCHILD(node); - node = AVL_XPARENT(node); - if (node == NULL) - return (NULL); - if (was_child == right) - break; - } - } - - return (AVL_NODE2DATA(node, off)); -} - -/* - * Return the lowest valued node in a tree or NULL. - * (leftmost child from root of tree) - */ -void * -avl_first(avl_tree_t *tree) -{ - avl_node_t *node; - avl_node_t *prev = NULL; - size_t off = tree->avl_offset; - - for (node = tree->avl_root; node != NULL; node = node->avl_child[0]) - prev = node; - - if (prev != NULL) - return (AVL_NODE2DATA(prev, off)); - return (NULL); -} - -/* - * Return the highest valued node in a tree or NULL. - * (rightmost child from root of tree) - */ -void * -avl_last(avl_tree_t *tree) -{ - avl_node_t *node; - avl_node_t *prev = NULL; - size_t off = tree->avl_offset; - - for (node = tree->avl_root; node != NULL; node = node->avl_child[1]) - prev = node; - - if (prev != NULL) - return (AVL_NODE2DATA(prev, off)); - return (NULL); -} - -/* - * Access the node immediately before or after an insertion point. - * - * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child - * - * Return value: - * NULL: no node in the given direction - * "void *" of the found tree node - */ -void * -avl_nearest(avl_tree_t *tree, avl_index_t where, int direction) -{ - int child = AVL_INDEX2CHILD(where); - avl_node_t *node = AVL_INDEX2NODE(where); - void *data; - size_t off = tree->avl_offset; - - if (node == NULL) { - ASSERT(tree->avl_root == NULL); - return (NULL); - } - data = AVL_NODE2DATA(node, off); - if (child != direction) - return (data); - - return (avl_walk(tree, data, direction)); -} - - -/* - * Search for the node which contains "value". The algorithm is a - * simple binary tree search. - * - * return value: - * NULL: the value is not in the AVL tree - * *where (if not NULL) is set to indicate the insertion point - * "void *" of the found tree node - */ -void * -avl_find(avl_tree_t *tree, const void *value, avl_index_t *where) -{ - avl_node_t *node; - avl_node_t *prev = NULL; - int child = 0; - int diff; - size_t off = tree->avl_offset; - - for (node = tree->avl_root; node != NULL; - node = node->avl_child[child]) { - - prev = node; - - diff = tree->avl_compar(value, AVL_NODE2DATA(node, off)); - ASSERT(-1 <= diff && diff <= 1); - if (diff == 0) { -#ifdef DEBUG - if (where != NULL) - *where = 0; -#endif - return (AVL_NODE2DATA(node, off)); - } - child = avl_balance2child[1 + diff]; - - } - - if (where != NULL) - *where = AVL_MKINDEX(prev, child); - - return (NULL); -} - - -/* - * Perform a rotation to restore balance at the subtree given by depth. - * - * This routine is used by both insertion and deletion. The return value - * indicates: - * 0 : subtree did not change height - * !0 : subtree was reduced in height - * - * The code is written as if handling left rotations, right rotations are - * symmetric and handled by swapping values of variables right/left[_heavy] - * - * On input balance is the "new" balance at "node". This value is either - * -2 or +2. - */ -static int -avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance) -{ - int left = !(balance < 0); /* when balance = -2, left will be 0 */ - int right = 1 - left; - int left_heavy = balance >> 1; - int right_heavy = -left_heavy; - avl_node_t *parent = AVL_XPARENT(node); - avl_node_t *child = node->avl_child[left]; - avl_node_t *cright; - avl_node_t *gchild; - avl_node_t *gright; - avl_node_t *gleft; - int which_child = AVL_XCHILD(node); - int child_bal = AVL_XBALANCE(child); - - /* BEGIN CSTYLED */ - /* - * case 1 : node is overly left heavy, the left child is balanced or - * also left heavy. This requires the following rotation. - * - * (node bal:-2) - * / \ - * / \ - * (child bal:0 or -1) - * / \ - * / \ - * cright - * - * becomes: - * - * (child bal:1 or 0) - * / \ - * / \ - * (node bal:-1 or 0) - * / \ - * / \ - * cright - * - * we detect this situation by noting that child's balance is not - * right_heavy. - */ - /* END CSTYLED */ - if (child_bal != right_heavy) { - - /* - * compute new balance of nodes - * - * If child used to be left heavy (now balanced) we reduced - * the height of this sub-tree -- used in "return...;" below - */ - child_bal += right_heavy; /* adjust towards right */ - - /* - * move "cright" to be node's left child - */ - cright = child->avl_child[right]; - node->avl_child[left] = cright; - if (cright != NULL) { - AVL_SETPARENT(cright, node); - AVL_SETCHILD(cright, left); - } - - /* - * move node to be child's right child - */ - child->avl_child[right] = node; - AVL_SETBALANCE(node, -child_bal); - AVL_SETCHILD(node, right); - AVL_SETPARENT(node, child); - - /* - * update the pointer into this subtree - */ - AVL_SETBALANCE(child, child_bal); - AVL_SETCHILD(child, which_child); - AVL_SETPARENT(child, parent); - if (parent != NULL) - parent->avl_child[which_child] = child; - else - tree->avl_root = child; - - return (child_bal == 0); - } - - /* BEGIN CSTYLED */ - /* - * case 2 : When node is left heavy, but child is right heavy we use - * a different rotation. - * - * (node b:-2) - * / \ - * / \ - * / \ - * (child b:+1) - * / \ - * / \ - * (gchild b: != 0) - * / \ - * / \ - * gleft gright - * - * becomes: - * - * (gchild b:0) - * / \ - * / \ - * / \ - * (child b:?) (node b:?) - * / \ / \ - * / \ / \ - * gleft gright - * - * computing the new balances is more complicated. As an example: - * if gchild was right_heavy, then child is now left heavy - * else it is balanced - */ - /* END CSTYLED */ - gchild = child->avl_child[right]; - gleft = gchild->avl_child[left]; - gright = gchild->avl_child[right]; - - /* - * move gright to left child of node and - * - * move gleft to right child of node - */ - node->avl_child[left] = gright; - if (gright != NULL) { - AVL_SETPARENT(gright, node); - AVL_SETCHILD(gright, left); - } - - child->avl_child[right] = gleft; - if (gleft != NULL) { - AVL_SETPARENT(gleft, child); - AVL_SETCHILD(gleft, right); - } - - /* - * move child to left child of gchild and - * - * move node to right child of gchild and - * - * fixup parent of all this to point to gchild - */ - balance = AVL_XBALANCE(gchild); - gchild->avl_child[left] = child; - AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0)); - AVL_SETPARENT(child, gchild); - AVL_SETCHILD(child, left); - - gchild->avl_child[right] = node; - AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0)); - AVL_SETPARENT(node, gchild); - AVL_SETCHILD(node, right); - - AVL_SETBALANCE(gchild, 0); - AVL_SETPARENT(gchild, parent); - AVL_SETCHILD(gchild, which_child); - if (parent != NULL) - parent->avl_child[which_child] = gchild; - else - tree->avl_root = gchild; - - return (1); /* the new tree is always shorter */ -} - - -/* - * Insert a new node into an AVL tree at the specified (from avl_find()) place. - * - * Newly inserted nodes are always leaf nodes in the tree, since avl_find() - * searches out to the leaf positions. The avl_index_t indicates the node - * which will be the parent of the new node. - * - * After the node is inserted, a single rotation further up the tree may - * be necessary to maintain an acceptable AVL balance. - */ -void -avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where) -{ - avl_node_t *node; - avl_node_t *parent = AVL_INDEX2NODE(where); - int old_balance; - int new_balance; - int which_child = AVL_INDEX2CHILD(where); - size_t off = tree->avl_offset; - - ASSERT(tree); -#ifdef _LP64 - ASSERT(((uintptr_t)new_data & 0x7) == 0); -#endif - - node = AVL_DATA2NODE(new_data, off); - - /* - * First, add the node to the tree at the indicated position. - */ - ++tree->avl_numnodes; - - node->avl_child[0] = NULL; - node->avl_child[1] = NULL; - - AVL_SETCHILD(node, which_child); - AVL_SETBALANCE(node, 0); - AVL_SETPARENT(node, parent); - if (parent != NULL) { - ASSERT(parent->avl_child[which_child] == NULL); - parent->avl_child[which_child] = node; - } else { - ASSERT(tree->avl_root == NULL); - tree->avl_root = node; - } - /* - * Now, back up the tree modifying the balance of all nodes above the - * insertion point. If we get to a highly unbalanced ancestor, we - * need to do a rotation. If we back out of the tree we are done. - * If we brought any subtree into perfect balance (0), we are also done. - */ - for (;;) { - node = parent; - if (node == NULL) - return; - - /* - * Compute the new balance - */ - old_balance = AVL_XBALANCE(node); - new_balance = old_balance + avl_child2balance[which_child]; - - /* - * If we introduced equal balance, then we are done immediately - */ - if (new_balance == 0) { - AVL_SETBALANCE(node, 0); - return; - } - - /* - * If both old and new are not zero we went - * from -1 to -2 balance, do a rotation. - */ - if (old_balance != 0) - break; - - AVL_SETBALANCE(node, new_balance); - parent = AVL_XPARENT(node); - which_child = AVL_XCHILD(node); - } - - /* - * perform a rotation to fix the tree and return - */ - (void) avl_rotation(tree, node, new_balance); -} - -/* - * Insert "new_data" in "tree" in the given "direction" either after or - * before (AVL_AFTER, AVL_BEFORE) the data "here". - * - * Insertions can only be done at empty leaf points in the tree, therefore - * if the given child of the node is already present we move to either - * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since - * every other node in the tree is a leaf, this always works. - * - * To help developers using this interface, we assert that the new node - * is correctly ordered at every step of the way in DEBUG kernels. - */ -void -avl_insert_here( - avl_tree_t *tree, - void *new_data, - void *here, - int direction) -{ - avl_node_t *node; - int child = direction; /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */ -#ifdef DEBUG - int diff; -#endif - - ASSERT(tree != NULL); - ASSERT(new_data != NULL); - ASSERT(here != NULL); - ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER); - - /* - * If corresponding child of node is not NULL, go to the neighboring - * node and reverse the insertion direction. - */ - node = AVL_DATA2NODE(here, tree->avl_offset); - -#ifdef DEBUG - diff = tree->avl_compar(new_data, here); - ASSERT(-1 <= diff && diff <= 1); - ASSERT(diff != 0); - ASSERT(diff > 0 ? child == 1 : child == 0); -#endif - - if (node->avl_child[child] != NULL) { - node = node->avl_child[child]; - child = 1 - child; - while (node->avl_child[child] != NULL) { -#ifdef DEBUG - diff = tree->avl_compar(new_data, - AVL_NODE2DATA(node, tree->avl_offset)); - ASSERT(-1 <= diff && diff <= 1); - ASSERT(diff != 0); - ASSERT(diff > 0 ? child == 1 : child == 0); -#endif - node = node->avl_child[child]; - } -#ifdef DEBUG - diff = tree->avl_compar(new_data, - AVL_NODE2DATA(node, tree->avl_offset)); - ASSERT(-1 <= diff && diff <= 1); - ASSERT(diff != 0); - ASSERT(diff > 0 ? child == 1 : child == 0); -#endif - } - ASSERT(node->avl_child[child] == NULL); - - avl_insert(tree, new_data, AVL_MKINDEX(node, child)); -} - -/* - * Add a new node to an AVL tree. - */ -void -avl_add(avl_tree_t *tree, void *new_node) -{ - avl_index_t where; - - /* - * This is unfortunate. We want to call panic() here, even for - * non-DEBUG kernels. In userland, however, we can't depend on anything - * in libc or else the rtld build process gets confused. - * Thankfully, rtld provides us with its own assfail() so we can use - * that here. We use assfail() directly to get a nice error message - * in the core - much like what panic() does for crashdumps. - */ - if (avl_find(tree, new_node, &where) != NULL) -#ifdef _KERNEL - panic("avl_find() succeeded inside avl_add()"); -#else - (void) assfail("avl_find() succeeded inside avl_add()", - __FILE__, __LINE__); -#endif - avl_insert(tree, new_node, where); -} - -/* - * Delete a node from the AVL tree. Deletion is similar to insertion, but - * with 2 complications. - * - * First, we may be deleting an interior node. Consider the following subtree: - * - * d c c - * / \ / \ / \ - * b e b e b e - * / \ / \ / - * a c a a - * - * When we are deleting node (d), we find and bring up an adjacent valued leaf - * node, say (c), to take the interior node's place. In the code this is - * handled by temporarily swapping (d) and (c) in the tree and then using - * common code to delete (d) from the leaf position. - * - * Secondly, an interior deletion from a deep tree may require more than one - * rotation to fix the balance. This is handled by moving up the tree through - * parents and applying rotations as needed. The return value from - * avl_rotation() is used to detect when a subtree did not change overall - * height due to a rotation. - */ -void -avl_remove(avl_tree_t *tree, void *data) -{ - avl_node_t *delete; - avl_node_t *parent; - avl_node_t *node; - avl_node_t tmp; - int old_balance; - int new_balance; - int left; - int right; - int which_child; - size_t off = tree->avl_offset; - - ASSERT(tree); - - delete = AVL_DATA2NODE(data, off); - - /* - * Deletion is easiest with a node that has at most 1 child. - * We swap a node with 2 children with a sequentially valued - * neighbor node. That node will have at most 1 child. Note this - * has no effect on the ordering of the remaining nodes. - * - * As an optimization, we choose the greater neighbor if the tree - * is right heavy, otherwise the left neighbor. This reduces the - * number of rotations needed. - */ - if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) { - - /* - * choose node to swap from whichever side is taller - */ - old_balance = AVL_XBALANCE(delete); - left = avl_balance2child[old_balance + 1]; - right = 1 - left; - - /* - * get to the previous value'd node - * (down 1 left, as far as possible right) - */ - for (node = delete->avl_child[left]; - node->avl_child[right] != NULL; - node = node->avl_child[right]) - ; - - /* - * create a temp placeholder for 'node' - * move 'node' to delete's spot in the tree - */ - tmp = *node; - - *node = *delete; - if (node->avl_child[left] == node) - node->avl_child[left] = &tmp; - - parent = AVL_XPARENT(node); - if (parent != NULL) - parent->avl_child[AVL_XCHILD(node)] = node; - else - tree->avl_root = node; - AVL_SETPARENT(node->avl_child[left], node); - AVL_SETPARENT(node->avl_child[right], node); - - /* - * Put tmp where node used to be (just temporary). - * It always has a parent and at most 1 child. - */ - delete = &tmp; - parent = AVL_XPARENT(delete); - parent->avl_child[AVL_XCHILD(delete)] = delete; - which_child = (delete->avl_child[1] != 0); - if (delete->avl_child[which_child] != NULL) - AVL_SETPARENT(delete->avl_child[which_child], delete); - } - - - /* - * Here we know "delete" is at least partially a leaf node. It can - * be easily removed from the tree. - */ - ASSERT(tree->avl_numnodes > 0); - --tree->avl_numnodes; - parent = AVL_XPARENT(delete); - which_child = AVL_XCHILD(delete); - if (delete->avl_child[0] != NULL) - node = delete->avl_child[0]; - else - node = delete->avl_child[1]; - - /* - * Connect parent directly to node (leaving out delete). - */ - if (node != NULL) { - AVL_SETPARENT(node, parent); - AVL_SETCHILD(node, which_child); - } - if (parent == NULL) { - tree->avl_root = node; - return; - } - parent->avl_child[which_child] = node; - - - /* - * Since the subtree is now shorter, begin adjusting parent balances - * and performing any needed rotations. - */ - do { - - /* - * Move up the tree and adjust the balance - * - * Capture the parent and which_child values for the next - * iteration before any rotations occur. - */ - node = parent; - old_balance = AVL_XBALANCE(node); - new_balance = old_balance - avl_child2balance[which_child]; - parent = AVL_XPARENT(node); - which_child = AVL_XCHILD(node); - - /* - * If a node was in perfect balance but isn't anymore then - * we can stop, since the height didn't change above this point - * due to a deletion. - */ - if (old_balance == 0) { - AVL_SETBALANCE(node, new_balance); - break; - } - - /* - * If the new balance is zero, we don't need to rotate - * else - * need a rotation to fix the balance. - * If the rotation doesn't change the height - * of the sub-tree we have finished adjusting. - */ - if (new_balance == 0) - AVL_SETBALANCE(node, new_balance); - else if (!avl_rotation(tree, node, new_balance)) - break; - } while (parent != NULL); -} - -#define AVL_REINSERT(tree, obj) \ - avl_remove((tree), (obj)); \ - avl_add((tree), (obj)) - -boolean_t -avl_update_lt(avl_tree_t *t, void *obj) -{ - void *neighbor; - - ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) || - (t->avl_compar(obj, neighbor) <= 0)); - - neighbor = AVL_PREV(t, obj); - if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) { - AVL_REINSERT(t, obj); - return (B_TRUE); - } - - return (B_FALSE); -} - -boolean_t -avl_update_gt(avl_tree_t *t, void *obj) -{ - void *neighbor; - - ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) || - (t->avl_compar(obj, neighbor) >= 0)); - - neighbor = AVL_NEXT(t, obj); - if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) { - AVL_REINSERT(t, obj); - return (B_TRUE); - } - - return (B_FALSE); -} - -boolean_t -avl_update(avl_tree_t *t, void *obj) -{ - void *neighbor; - - neighbor = AVL_PREV(t, obj); - if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) { - AVL_REINSERT(t, obj); - return (B_TRUE); - } - - neighbor = AVL_NEXT(t, obj); - if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) { - AVL_REINSERT(t, obj); - return (B_TRUE); - } - - return (B_FALSE); -} - -void -avl_swap(avl_tree_t *tree1, avl_tree_t *tree2) -{ - avl_node_t *temp_node; - ulong_t temp_numnodes; - - ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar); - ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset); - ASSERT3U(tree1->avl_size, ==, tree2->avl_size); - - temp_node = tree1->avl_root; - temp_numnodes = tree1->avl_numnodes; - tree1->avl_root = tree2->avl_root; - tree1->avl_numnodes = tree2->avl_numnodes; - tree2->avl_root = temp_node; - tree2->avl_numnodes = temp_numnodes; -} - -/* - * initialize a new AVL tree - */ -void -avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *), - size_t size, size_t offset) -{ - ASSERT(tree); - ASSERT(compar); - ASSERT(size > 0); - ASSERT(size >= offset + sizeof (avl_node_t)); -#ifdef _LP64 - ASSERT((offset & 0x7) == 0); -#endif - - tree->avl_compar = compar; - tree->avl_root = NULL; - tree->avl_numnodes = 0; - tree->avl_size = size; - tree->avl_offset = offset; -} - -/* - * Delete a tree. - */ -/* ARGSUSED */ -void -avl_destroy(avl_tree_t *tree) -{ - ASSERT(tree); - ASSERT(tree->avl_numnodes == 0); - ASSERT(tree->avl_root == NULL); -} - - -/* - * Return the number of nodes in an AVL tree. - */ -ulong_t -avl_numnodes(avl_tree_t *tree) -{ - ASSERT(tree); - return (tree->avl_numnodes); -} - -boolean_t -avl_is_empty(avl_tree_t *tree) -{ - ASSERT(tree); - return (tree->avl_numnodes == 0); -} - -#define CHILDBIT (1L) - -/* - * Post-order tree walk used to visit all tree nodes and destroy the tree - * in post order. This is used for destroying a tree without paying any cost - * for rebalancing it. - * - * example: - * - * void *cookie = NULL; - * my_data_t *node; - * - * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL) - * free(node); - * avl_destroy(tree); - * - * The cookie is really an avl_node_t to the current node's parent and - * an indication of which child you looked at last. - * - * On input, a cookie value of CHILDBIT indicates the tree is done. - */ -void * -avl_destroy_nodes(avl_tree_t *tree, void **cookie) -{ - avl_node_t *node; - avl_node_t *parent; - int child; - void *first; - size_t off = tree->avl_offset; - - /* - * Initial calls go to the first node or it's right descendant. - */ - if (*cookie == NULL) { - first = avl_first(tree); - - /* - * deal with an empty tree - */ - if (first == NULL) { - *cookie = (void *)CHILDBIT; - return (NULL); - } - - node = AVL_DATA2NODE(first, off); - parent = AVL_XPARENT(node); - goto check_right_side; - } - - /* - * If there is no parent to return to we are done. - */ - parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT); - if (parent == NULL) { - if (tree->avl_root != NULL) { - ASSERT(tree->avl_numnodes == 1); - tree->avl_root = NULL; - tree->avl_numnodes = 0; - } - return (NULL); - } - - /* - * Remove the child pointer we just visited from the parent and tree. - */ - child = (uintptr_t)(*cookie) & CHILDBIT; - parent->avl_child[child] = NULL; - ASSERT(tree->avl_numnodes > 1); - --tree->avl_numnodes; - - /* - * If we just did a right child or there isn't one, go up to parent. - */ - if (child == 1 || parent->avl_child[1] == NULL) { - node = parent; - parent = AVL_XPARENT(parent); - goto done; - } - - /* - * Do parent's right child, then leftmost descendent. - */ - node = parent->avl_child[1]; - while (node->avl_child[0] != NULL) { - parent = node; - node = node->avl_child[0]; - } - - /* - * If here, we moved to a left child. It may have one - * child on the right (when balance == +1). - */ -check_right_side: - if (node->avl_child[1] != NULL) { - ASSERT(AVL_XBALANCE(node) == 1); - parent = node; - node = node->avl_child[1]; - ASSERT(node->avl_child[0] == NULL && - node->avl_child[1] == NULL); - } else { - ASSERT(AVL_XBALANCE(node) <= 0); - } - -done: - if (parent == NULL) { - *cookie = (void *)CHILDBIT; - ASSERT(node == tree->avl_root); - } else { - *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node)); - } - - return (AVL_NODE2DATA(node, off)); -} diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_fnvpair.c b/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_fnvpair.c deleted file mode 100644 index eb200a24e6d2..000000000000 --- a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_fnvpair.c +++ /dev/null @@ -1,512 +0,0 @@ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -#include <sys/nvpair.h> -#ifndef _KERNEL -#include <sys/zfs_context.h> -#else -#include <sys/debug.h> -#include <sys/kmem.h> -#include <sys/param.h> -#include <sys/debug.h> -#endif - -/* - * "Force" nvlist wrapper. - * - * These functions wrap the nvlist_* functions with assertions that assume - * the operation is successful. This allows the caller's code to be much - * more readable, especially for the fnvlist_lookup_* and fnvpair_value_* - * functions, which can return the requested value (rather than filling in - * a pointer). - * - * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate - * with KM_SLEEP. - * - * More wrappers should be added as needed -- for example - * nvlist_lookup_*_array and nvpair_value_*_array. - */ - -nvlist_t * -fnvlist_alloc(void) -{ - nvlist_t *nvl; - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)); - return (nvl); -} - -void -fnvlist_free(nvlist_t *nvl) -{ - nvlist_free(nvl); -} - -size_t -fnvlist_size(nvlist_t *nvl) -{ - size_t size; - VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_NATIVE)); - return (size); -} - -/* - * Returns allocated buffer of size *sizep. Caller must free the buffer with - * fnvlist_pack_free(). - */ -char * -fnvlist_pack(nvlist_t *nvl, size_t *sizep) -{ - char *packed = 0; - VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE, - KM_SLEEP), ==, 0); - return (packed); -} - -/*ARGSUSED*/ -void -fnvlist_pack_free(char *pack, size_t size) -{ -#ifdef _KERNEL - kmem_free(pack, size); -#else - free(pack); -#endif -} - -nvlist_t * -fnvlist_unpack(char *buf, size_t buflen) -{ - nvlist_t *rv; - VERIFY0(nvlist_unpack(buf, buflen, &rv, KM_SLEEP)); - return (rv); -} - -nvlist_t * -fnvlist_dup(nvlist_t *nvl) -{ - nvlist_t *rv; - VERIFY0(nvlist_dup(nvl, &rv, KM_SLEEP)); - return (rv); -} - -void -fnvlist_merge(nvlist_t *dst, nvlist_t *src) -{ - VERIFY0(nvlist_merge(dst, src, KM_SLEEP)); -} - -size_t -fnvlist_num_pairs(nvlist_t *nvl) -{ - size_t count = 0; - nvpair_t *pair; - - for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) - count++; - return (count); -} - -void -fnvlist_add_boolean(nvlist_t *nvl, const char *name) -{ - VERIFY0(nvlist_add_boolean(nvl, name)); -} - -void -fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val) -{ - VERIFY0(nvlist_add_boolean_value(nvl, name, val)); -} - -void -fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val) -{ - VERIFY0(nvlist_add_byte(nvl, name, val)); -} - -void -fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val) -{ - VERIFY0(nvlist_add_int8(nvl, name, val)); -} - -void -fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val) -{ - VERIFY0(nvlist_add_uint8(nvl, name, val)); -} - -void -fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val) -{ - VERIFY0(nvlist_add_int16(nvl, name, val)); -} - -void -fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val) -{ - VERIFY0(nvlist_add_uint16(nvl, name, val)); -} - -void -fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val) -{ - VERIFY0(nvlist_add_int32(nvl, name, val)); -} - -void -fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val) -{ - VERIFY0(nvlist_add_uint32(nvl, name, val)); -} - -void -fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val) -{ - VERIFY0(nvlist_add_int64(nvl, name, val)); -} - -void -fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) -{ - VERIFY0(nvlist_add_uint64(nvl, name, val)); -} - -void -fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val) -{ - VERIFY0(nvlist_add_string(nvl, name, val)); -} - -void -fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) -{ - VERIFY0(nvlist_add_nvlist(nvl, name, val)); -} - -void -fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair) -{ - VERIFY0(nvlist_add_nvpair(nvl, pair)); -} - -void -fnvlist_add_boolean_array(nvlist_t *nvl, const char *name, - boolean_t *val, uint_t n) -{ - VERIFY0(nvlist_add_boolean_array(nvl, name, val, n)); -} - -void -fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n) -{ - VERIFY0(nvlist_add_byte_array(nvl, name, val, n)); -} - -void -fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n) -{ - VERIFY0(nvlist_add_int8_array(nvl, name, val, n)); -} - -void -fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n) -{ - VERIFY0(nvlist_add_uint8_array(nvl, name, val, n)); -} - -void -fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n) -{ - VERIFY0(nvlist_add_int16_array(nvl, name, val, n)); -} - -void -fnvlist_add_uint16_array(nvlist_t *nvl, const char *name, - uint16_t *val, uint_t n) -{ - VERIFY0(nvlist_add_uint16_array(nvl, name, val, n)); -} - -void -fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n) -{ - VERIFY0(nvlist_add_int32_array(nvl, name, val, n)); -} - -void -fnvlist_add_uint32_array(nvlist_t *nvl, const char *name, - uint32_t *val, uint_t n) -{ - VERIFY0(nvlist_add_uint32_array(nvl, name, val, n)); -} - -void -fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n) -{ - VERIFY0(nvlist_add_int64_array(nvl, name, val, n)); -} - -void -fnvlist_add_uint64_array(nvlist_t *nvl, const char *name, - uint64_t *val, uint_t n) -{ - VERIFY0(nvlist_add_uint64_array(nvl, name, val, n)); -} - -void -fnvlist_add_string_array(nvlist_t *nvl, const char *name, - char * const *val, uint_t n) -{ - VERIFY0(nvlist_add_string_array(nvl, name, val, n)); -} - -void -fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name, - nvlist_t **val, uint_t n) -{ - VERIFY0(nvlist_add_nvlist_array(nvl, name, val, n)); -} - -void -fnvlist_remove(nvlist_t *nvl, const char *name) -{ - VERIFY0(nvlist_remove_all(nvl, name)); -} - -void -fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair) -{ - VERIFY0(nvlist_remove_nvpair(nvl, pair)); -} - -nvpair_t * -fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name) -{ - nvpair_t *rv; - VERIFY0(nvlist_lookup_nvpair(nvl, name, &rv)); - return (rv); -} - -/* returns B_TRUE if the entry exists */ -boolean_t -fnvlist_lookup_boolean(nvlist_t *nvl, const char *name) -{ - return (nvlist_lookup_boolean(nvl, name) == 0); -} - -boolean_t -fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name) -{ - boolean_t rv; - VERIFY0(nvlist_lookup_boolean_value(nvl, name, &rv)); - return (rv); -} - -uchar_t -fnvlist_lookup_byte(nvlist_t *nvl, const char *name) -{ - uchar_t rv; - VERIFY0(nvlist_lookup_byte(nvl, name, &rv)); - return (rv); -} - -int8_t -fnvlist_lookup_int8(nvlist_t *nvl, const char *name) -{ - int8_t rv; - VERIFY0(nvlist_lookup_int8(nvl, name, &rv)); - return (rv); -} - -int16_t -fnvlist_lookup_int16(nvlist_t *nvl, const char *name) -{ - int16_t rv; - VERIFY0(nvlist_lookup_int16(nvl, name, &rv)); - return (rv); -} - -int32_t -fnvlist_lookup_int32(nvlist_t *nvl, const char *name) -{ - int32_t rv; - VERIFY0(nvlist_lookup_int32(nvl, name, &rv)); - return (rv); -} - -int64_t -fnvlist_lookup_int64(nvlist_t *nvl, const char *name) -{ - int64_t rv; - VERIFY0(nvlist_lookup_int64(nvl, name, &rv)); - return (rv); -} - -uint8_t -fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name) -{ - uint8_t rv; - VERIFY0(nvlist_lookup_uint8(nvl, name, &rv)); - return (rv); -} - -uint16_t -fnvlist_lookup_uint16(nvlist_t *nvl, const char *name) -{ - uint16_t rv; - VERIFY0(nvlist_lookup_uint16(nvl, name, &rv)); - return (rv); -} - -uint32_t -fnvlist_lookup_uint32(nvlist_t *nvl, const char *name) -{ - uint32_t rv; - VERIFY0(nvlist_lookup_uint32(nvl, name, &rv)); - return (rv); -} - -uint64_t -fnvlist_lookup_uint64(nvlist_t *nvl, const char *name) -{ - uint64_t rv; - VERIFY0(nvlist_lookup_uint64(nvl, name, &rv)); - return (rv); -} - -char * -fnvlist_lookup_string(nvlist_t *nvl, const char *name) -{ - char *rv; - VERIFY0(nvlist_lookup_string(nvl, name, &rv)); - return (rv); -} - -nvlist_t * -fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name) -{ - nvlist_t *rv; - VERIFY0(nvlist_lookup_nvlist(nvl, name, &rv)); - return (rv); -} - -boolean_t -fnvpair_value_boolean_value(nvpair_t *nvp) -{ - boolean_t rv; - VERIFY0(nvpair_value_boolean_value(nvp, &rv)); - return (rv); -} - -uchar_t -fnvpair_value_byte(nvpair_t *nvp) -{ - uchar_t rv; - VERIFY0(nvpair_value_byte(nvp, &rv)); - return (rv); -} - -int8_t -fnvpair_value_int8(nvpair_t *nvp) -{ - int8_t rv; - VERIFY0(nvpair_value_int8(nvp, &rv)); - return (rv); -} - -int16_t -fnvpair_value_int16(nvpair_t *nvp) -{ - int16_t rv; - VERIFY0(nvpair_value_int16(nvp, &rv)); - return (rv); -} - -int32_t -fnvpair_value_int32(nvpair_t *nvp) -{ - int32_t rv; - VERIFY0(nvpair_value_int32(nvp, &rv)); - return (rv); -} - -int64_t -fnvpair_value_int64(nvpair_t *nvp) -{ - int64_t rv; - VERIFY0(nvpair_value_int64(nvp, &rv)); - return (rv); -} - -uint8_t -fnvpair_value_uint8_t(nvpair_t *nvp) -{ - uint8_t rv; - VERIFY0(nvpair_value_uint8(nvp, &rv)); - return (rv); -} - -uint16_t -fnvpair_value_uint16(nvpair_t *nvp) -{ - uint16_t rv; - VERIFY0(nvpair_value_uint16(nvp, &rv)); - return (rv); -} - -uint32_t -fnvpair_value_uint32(nvpair_t *nvp) -{ - uint32_t rv; - VERIFY0(nvpair_value_uint32(nvp, &rv)); - return (rv); -} - -uint64_t -fnvpair_value_uint64(nvpair_t *nvp) -{ - uint64_t rv; - VERIFY0(nvpair_value_uint64(nvp, &rv)); - return (rv); -} - -char * -fnvpair_value_string(nvpair_t *nvp) -{ - char *rv; - VERIFY0(nvpair_value_string(nvp, &rv)); - return (rv); -} - -nvlist_t * -fnvpair_value_nvlist(nvpair_t *nvp) -{ - nvlist_t *rv; - VERIFY0(nvpair_value_nvlist(nvp, &rv)); - return (rv); -} diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair.c b/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair.c deleted file mode 100644 index c322a5bd2179..000000000000 --- a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair.c +++ /dev/null @@ -1,3600 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, 2017 by Delphix. All rights reserved. - */ - -#include <sys/debug.h> -#include <sys/nvpair.h> -#include <sys/nvpair_impl.h> -#include <rpc/types.h> -#include <rpc/xdr.h> - -#if defined(_KERNEL) && !defined(_BOOT) -#include <sys/varargs.h> -#include <sys/sunddi.h> -#else -#include <stdarg.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#endif - -#ifndef offsetof -#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) -#endif -#define skip_whitespace(p) while ((*(p) == ' ') || (*(p) == '\t')) p++ - -#if defined(__FreeBSD__) && !defined(_KERNEL) -/* - * libnvpair is the lowest commen denominator for ZFS related libraries, - * defining aok here makes it usable by all ZFS related libraries - */ -int aok; -#endif - -/* - * nvpair.c - Provides kernel & userland interfaces for manipulating - * name-value pairs. - * - * Overview Diagram - * - * +--------------+ - * | nvlist_t | - * |--------------| - * | nvl_version | - * | nvl_nvflag | - * | nvl_priv -+-+ - * | nvl_flag | | - * | nvl_pad | | - * +--------------+ | - * V - * +--------------+ last i_nvp in list - * | nvpriv_t | +---------------------> - * |--------------| | - * +--+- nvp_list | | +------------+ - * | | nvp_last -+--+ + nv_alloc_t | - * | | nvp_curr | |------------| - * | | nvp_nva -+----> | nva_ops | - * | | nvp_stat | | nva_arg | - * | +--------------+ +------------+ - * | - * +-------+ - * V - * +---------------------+ +-------------------+ - * | i_nvp_t | +-->| i_nvp_t | +--> - * |---------------------| | |-------------------| | - * | nvi_next -+--+ | nvi_next -+--+ - * | nvi_prev (NULL) | <----+ nvi_prev | - * | . . . . . . . . . . | | . . . . . . . . . | - * | nvp (nvpair_t) | | nvp (nvpair_t) | - * | - nvp_size | | - nvp_size | - * | - nvp_name_sz | | - nvp_name_sz | - * | - nvp_value_elem | | - nvp_value_elem | - * | - nvp_type | | - nvp_type | - * | - data ... | | - data ... | - * +---------------------+ +-------------------+ - * - * - * - * +---------------------+ +---------------------+ - * | i_nvp_t | +--> +-->| i_nvp_t (last) | - * |---------------------| | | |---------------------| - * | nvi_next -+--+ ... --+ | nvi_next (NULL) | - * <-+- nvi_prev |<-- ... <----+ nvi_prev | - * | . . . . . . . . . | | . . . . . . . . . | - * | nvp (nvpair_t) | | nvp (nvpair_t) | - * | - nvp_size | | - nvp_size | - * | - nvp_name_sz | | - nvp_name_sz | - * | - nvp_value_elem | | - nvp_value_elem | - * | - DATA_TYPE_NVLIST | | - nvp_type | - * | - data (embedded) | | - data ... | - * | nvlist name | +---------------------+ - * | +--------------+ | - * | | nvlist_t | | - * | |--------------| | - * | | nvl_version | | - * | | nvl_nvflag | | - * | | nvl_priv --+---+----> - * | | nvl_flag | | - * | | nvl_pad | | - * | +--------------+ | - * +---------------------+ - * - * - * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will - * allow value to be aligned on 8 byte boundary - * - * name_len is the length of the name string including the null terminator - * so it must be >= 1 - */ -#define NVP_SIZE_CALC(name_len, data_len) \ - (NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len)) - -static int i_get_value_size(data_type_t type, const void *data, uint_t nelem); -static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, - uint_t nelem, const void *data); - -#define NV_STAT_EMBEDDED 0x1 -#define EMBEDDED_NVL(nvp) ((nvlist_t *)(void *)NVP_VALUE(nvp)) -#define EMBEDDED_NVL_ARRAY(nvp) ((nvlist_t **)(void *)NVP_VALUE(nvp)) - -#define NVP_VALOFF(nvp) (NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz)) -#define NVPAIR2I_NVP(nvp) \ - ((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp))) - -#ifdef _KERNEL -int nvpair_max_recursion = 20; -#else -int nvpair_max_recursion = 100; -#endif - -uint64_t nvlist_hashtable_init_size = (1 << 4); - -int -nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...) -{ - va_list valist; - int err = 0; - - nva->nva_ops = nvo; - nva->nva_arg = NULL; - - va_start(valist, nvo); - if (nva->nva_ops->nv_ao_init != NULL) - err = nva->nva_ops->nv_ao_init(nva, valist); - va_end(valist); - - return (err); -} - -void -nv_alloc_reset(nv_alloc_t *nva) -{ - if (nva->nva_ops->nv_ao_reset != NULL) - nva->nva_ops->nv_ao_reset(nva); -} - -void -nv_alloc_fini(nv_alloc_t *nva) -{ - if (nva->nva_ops->nv_ao_fini != NULL) - nva->nva_ops->nv_ao_fini(nva); -} - -nv_alloc_t * -nvlist_lookup_nv_alloc(nvlist_t *nvl) -{ - nvpriv_t *priv; - - if (nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (NULL); - - return (priv->nvp_nva); -} - -static void * -nv_mem_zalloc(nvpriv_t *nvp, size_t size) -{ - nv_alloc_t *nva = nvp->nvp_nva; - void *buf; - - if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL) - bzero(buf, size); - - return (buf); -} - -static void -nv_mem_free(nvpriv_t *nvp, void *buf, size_t size) -{ - nv_alloc_t *nva = nvp->nvp_nva; - - nva->nva_ops->nv_ao_free(nva, buf, size); -} - -static void -nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat) -{ - bzero(priv, sizeof (nvpriv_t)); - - priv->nvp_nva = nva; - priv->nvp_stat = stat; -} - -static nvpriv_t * -nv_priv_alloc(nv_alloc_t *nva) -{ - nvpriv_t *priv; - - /* - * nv_mem_alloc() cannot called here because it needs the priv - * argument. - */ - if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL) - return (NULL); - - nv_priv_init(priv, nva, 0); - - return (priv); -} - -/* - * Embedded lists need their own nvpriv_t's. We create a new - * nvpriv_t using the parameters and allocator from the parent - * list's nvpriv_t. - */ -static nvpriv_t * -nv_priv_alloc_embedded(nvpriv_t *priv) -{ - nvpriv_t *emb_priv; - - if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL) - return (NULL); - - nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED); - - return (emb_priv); -} - -static int -nvt_tab_alloc(nvpriv_t *priv, uint64_t buckets) -{ - ASSERT3P(priv->nvp_hashtable, ==, NULL); - ASSERT0(priv->nvp_nbuckets); - ASSERT0(priv->nvp_nentries); - - i_nvp_t **tab = nv_mem_zalloc(priv, buckets * sizeof (i_nvp_t *)); - if (tab == NULL) - return (ENOMEM); - - priv->nvp_hashtable = tab; - priv->nvp_nbuckets = buckets; - return (0); -} - -static void -nvt_tab_free(nvpriv_t *priv) -{ - i_nvp_t **tab = priv->nvp_hashtable; - if (tab == NULL) { - ASSERT0(priv->nvp_nbuckets); - ASSERT0(priv->nvp_nentries); - return; - } - - nv_mem_free(priv, tab, priv->nvp_nbuckets * sizeof (i_nvp_t *)); - - priv->nvp_hashtable = NULL; - priv->nvp_nbuckets = 0; - priv->nvp_nentries = 0; -} - -static uint32_t -nvt_hash(const char *p) -{ - uint32_t g, hval = 0; - - while (*p) { - hval = (hval << 4) + *p++; - if ((g = (hval & 0xf0000000)) != 0) - hval ^= g >> 24; - hval &= ~g; - } - return (hval); -} - -static boolean_t -nvt_nvpair_match(nvpair_t *nvp1, nvpair_t *nvp2, uint32_t nvflag) -{ - boolean_t match = B_FALSE; - if (nvflag & NV_UNIQUE_NAME_TYPE) { - if (strcmp(NVP_NAME(nvp1), NVP_NAME(nvp2)) == 0 && - NVP_TYPE(nvp1) == NVP_TYPE(nvp2)) - match = B_TRUE; - } else { - ASSERT(nvflag == 0 || nvflag & NV_UNIQUE_NAME); - if (strcmp(NVP_NAME(nvp1), NVP_NAME(nvp2)) == 0) - match = B_TRUE; - } - return (match); -} - -static nvpair_t * -nvt_lookup_name_type(nvlist_t *nvl, const char *name, data_type_t type) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - ASSERT(priv != NULL); - - i_nvp_t **tab = priv->nvp_hashtable; - - if (tab == NULL) { - ASSERT3P(priv->nvp_list, ==, NULL); - ASSERT0(priv->nvp_nbuckets); - ASSERT0(priv->nvp_nentries); - return (NULL); - } else { - ASSERT(priv->nvp_nbuckets != 0); - } - - uint64_t hash = nvt_hash(name); - uint64_t index = hash & (priv->nvp_nbuckets - 1); - - ASSERT3U(index, <, priv->nvp_nbuckets); - i_nvp_t *entry = tab[index]; - - for (i_nvp_t *e = entry; e != NULL; e = e->nvi_hashtable_next) { - if (strcmp(NVP_NAME(&e->nvi_nvp), name) == 0 && - (type == DATA_TYPE_DONTCARE || - NVP_TYPE(&e->nvi_nvp) == type)) - return (&e->nvi_nvp); - } - return (NULL); -} - -static nvpair_t * -nvt_lookup_name(nvlist_t *nvl, const char *name) -{ - return (nvt_lookup_name_type(nvl, name, DATA_TYPE_DONTCARE)); -} - -static int -nvt_resize(nvpriv_t *priv, uint32_t new_size) -{ - i_nvp_t **tab = priv->nvp_hashtable; - - /* - * Migrate all the entries from the current table - * to a newly-allocated table with the new size by - * re-adjusting the pointers of their entries. - */ - uint32_t size = priv->nvp_nbuckets; - uint32_t new_mask = new_size - 1; - ASSERT(((new_size) & ((new_size) - 1)) == 0); - - i_nvp_t **new_tab = nv_mem_zalloc(priv, new_size * sizeof (i_nvp_t *)); - if (new_tab == NULL) - return (ENOMEM); - - uint32_t nentries = 0; - for (uint32_t i = 0; i < size; i++) { - i_nvp_t *next, *e = tab[i]; - - while (e != NULL) { - next = e->nvi_hashtable_next; - - uint32_t hash = nvt_hash(NVP_NAME(&e->nvi_nvp)); - uint32_t index = hash & new_mask; - - e->nvi_hashtable_next = new_tab[index]; - new_tab[index] = e; - nentries++; - - e = next; - } - tab[i] = NULL; - } - ASSERT3U(nentries, ==, priv->nvp_nentries); - - nvt_tab_free(priv); - - priv->nvp_hashtable = new_tab; - priv->nvp_nbuckets = new_size; - priv->nvp_nentries = nentries; - - return (0); -} - -static boolean_t -nvt_needs_togrow(nvpriv_t *priv) -{ - /* - * Grow only when we have more elements than buckets - * and the # of buckets doesn't overflow. - */ - return (priv->nvp_nentries > priv->nvp_nbuckets && - (UINT32_MAX >> 1) >= priv->nvp_nbuckets); -} - -/* - * Allocate a new table that's twice the size of the old one, - * and migrate all the entries from the old one to the new - * one by re-adjusting their pointers. - */ -static int -nvt_grow(nvpriv_t *priv) -{ - uint32_t current_size = priv->nvp_nbuckets; - /* ensure we won't overflow */ - ASSERT3U(UINT32_MAX >> 1, >=, current_size); - return (nvt_resize(priv, current_size << 1)); -} - -static boolean_t -nvt_needs_toshrink(nvpriv_t *priv) -{ - /* - * Shrink only when the # of elements is less than or - * equal to 1/4 the # of buckets. Never shrink less than - * nvlist_hashtable_init_size. - */ - ASSERT3U(priv->nvp_nbuckets, >=, nvlist_hashtable_init_size); - if (priv->nvp_nbuckets == nvlist_hashtable_init_size) - return (B_FALSE); - return (priv->nvp_nentries <= (priv->nvp_nbuckets >> 2)); -} - -/* - * Allocate a new table that's half the size of the old one, - * and migrate all the entries from the old one to the new - * one by re-adjusting their pointers. - */ -static int -nvt_shrink(nvpriv_t *priv) -{ - uint32_t current_size = priv->nvp_nbuckets; - /* ensure we won't overflow */ - ASSERT3U(current_size, >=, nvlist_hashtable_init_size); - return (nvt_resize(priv, current_size >> 1)); -} - -static int -nvt_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - - if (nvt_needs_toshrink(priv)) { - int err = nvt_shrink(priv); - if (err != 0) - return (err); - } - i_nvp_t **tab = priv->nvp_hashtable; - - char *name = NVP_NAME(nvp); - uint64_t hash = nvt_hash(name); - uint64_t index = hash & (priv->nvp_nbuckets - 1); - - ASSERT3U(index, <, priv->nvp_nbuckets); - i_nvp_t *bucket = tab[index]; - - for (i_nvp_t *prev = NULL, *e = bucket; - e != NULL; prev = e, e = e->nvi_hashtable_next) { - if (nvt_nvpair_match(&e->nvi_nvp, nvp, nvl->nvl_flag)) { - if (prev != NULL) { - prev->nvi_hashtable_next = - e->nvi_hashtable_next; - } else { - ASSERT3P(e, ==, bucket); - tab[index] = e->nvi_hashtable_next; - } - e->nvi_hashtable_next = NULL; - priv->nvp_nentries--; - break; - } - } - - return (0); -} - -static int -nvt_add_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - - /* initialize nvpair table now if it doesn't exist. */ - if (priv->nvp_hashtable == NULL) { - int err = nvt_tab_alloc(priv, nvlist_hashtable_init_size); - if (err != 0) - return (err); - } - - /* - * if we don't allow duplicate entries, make sure to - * unlink any existing entries from the table. - */ - if (nvl->nvl_nvflag != 0) { - int err = nvt_remove_nvpair(nvl, nvp); - if (err != 0) - return (err); - } - - if (nvt_needs_togrow(priv)) { - int err = nvt_grow(priv); - if (err != 0) - return (err); - } - i_nvp_t **tab = priv->nvp_hashtable; - - char *name = NVP_NAME(nvp); - uint64_t hash = nvt_hash(name); - uint64_t index = hash & (priv->nvp_nbuckets - 1); - - ASSERT3U(index, <, priv->nvp_nbuckets); - i_nvp_t *bucket = tab[index]; - - /* insert link at the beginning of the bucket */ - i_nvp_t *new_entry = NVPAIR2I_NVP(nvp); - ASSERT3P(new_entry->nvi_hashtable_next, ==, NULL); - new_entry->nvi_hashtable_next = bucket; - tab[index] = new_entry; - - priv->nvp_nentries++; - return (0); -} - -static void -nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv) -{ - nvl->nvl_version = NV_VERSION; - nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE); - nvl->nvl_priv = (uint64_t)(uintptr_t)priv; - nvl->nvl_flag = 0; - nvl->nvl_pad = 0; -} - -uint_t -nvlist_nvflag(nvlist_t *nvl) -{ - return (nvl->nvl_nvflag); -} - -/* - * nvlist_alloc - Allocate nvlist. - */ -/*ARGSUSED1*/ -int -nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag) -{ -#if defined(_KERNEL) && !defined(_BOOT) - return (nvlist_xalloc(nvlp, nvflag, - (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); -#else - return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep)); -#endif -} - -int -nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva) -{ - nvpriv_t *priv; - - if (nvlp == NULL || nva == NULL) - return (EINVAL); - - if ((priv = nv_priv_alloc(nva)) == NULL) - return (ENOMEM); - - if ((*nvlp = nv_mem_zalloc(priv, - NV_ALIGN(sizeof (nvlist_t)))) == NULL) { - nv_mem_free(priv, priv, sizeof (nvpriv_t)); - return (ENOMEM); - } - - nvlist_init(*nvlp, nvflag, priv); - - return (0); -} - -/* - * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair. - */ -static nvpair_t * -nvp_buf_alloc(nvlist_t *nvl, size_t len) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *buf; - nvpair_t *nvp; - size_t nvsize; - - /* - * Allocate the buffer - */ - nvsize = len + offsetof(i_nvp_t, nvi_nvp); - - if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL) - return (NULL); - - nvp = &buf->nvi_nvp; - nvp->nvp_size = len; - - return (nvp); -} - -/* - * nvp_buf_free - de-Allocate an i_nvp_t. - */ -static void -nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp); - - nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize); -} - -/* - * nvp_buf_link - link a new nv pair into the nvlist. - */ -static void -nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *curr = NVPAIR2I_NVP(nvp); - - /* Put element at end of nvlist */ - if (priv->nvp_list == NULL) { - priv->nvp_list = priv->nvp_last = curr; - } else { - curr->nvi_prev = priv->nvp_last; - priv->nvp_last->nvi_next = curr; - priv->nvp_last = curr; - } -} - -/* - * nvp_buf_unlink - unlink an removed nvpair out of the nvlist. - */ -static void -nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *curr = NVPAIR2I_NVP(nvp); - - /* - * protect nvlist_next_nvpair() against walking on freed memory. - */ - if (priv->nvp_curr == curr) - priv->nvp_curr = curr->nvi_next; - - if (curr == priv->nvp_list) - priv->nvp_list = curr->nvi_next; - else - curr->nvi_prev->nvi_next = curr->nvi_next; - - if (curr == priv->nvp_last) - priv->nvp_last = curr->nvi_prev; - else - curr->nvi_next->nvi_prev = curr->nvi_prev; -} - -/* - * take a nvpair type and number of elements and make sure the are valid - */ -static int -i_validate_type_nelem(data_type_t type, uint_t nelem) -{ - switch (type) { - case DATA_TYPE_BOOLEAN: - if (nelem != 0) - return (EINVAL); - break; - case DATA_TYPE_BOOLEAN_VALUE: - case DATA_TYPE_BYTE: - case DATA_TYPE_INT8: - case DATA_TYPE_UINT8: - case DATA_TYPE_INT16: - case DATA_TYPE_UINT16: - case DATA_TYPE_INT32: - case DATA_TYPE_UINT32: - case DATA_TYPE_INT64: - case DATA_TYPE_UINT64: - case DATA_TYPE_STRING: - case DATA_TYPE_HRTIME: - case DATA_TYPE_NVLIST: -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: -#endif - if (nelem != 1) - return (EINVAL); - break; - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: - case DATA_TYPE_NVLIST_ARRAY: - /* we allow arrays with 0 elements */ - break; - default: - return (EINVAL); - } - return (0); -} - -/* - * Verify nvp_name_sz and check the name string length. - */ -static int -i_validate_nvpair_name(nvpair_t *nvp) -{ - if ((nvp->nvp_name_sz <= 0) || - (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0))) - return (EFAULT); - - /* verify the name string, make sure its terminated */ - if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0') - return (EFAULT); - - return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT); -} - -static int -i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data) -{ - switch (type) { - case DATA_TYPE_BOOLEAN_VALUE: - if (*(boolean_t *)data != B_TRUE && - *(boolean_t *)data != B_FALSE) - return (EINVAL); - break; - case DATA_TYPE_BOOLEAN_ARRAY: { - int i; - - for (i = 0; i < nelem; i++) - if (((boolean_t *)data)[i] != B_TRUE && - ((boolean_t *)data)[i] != B_FALSE) - return (EINVAL); - break; - } - default: - break; - } - - return (0); -} - -/* - * This function takes a pointer to what should be a nvpair and it's size - * and then verifies that all the nvpair fields make sense and can be - * trusted. This function is used when decoding packed nvpairs. - */ -static int -i_validate_nvpair(nvpair_t *nvp) -{ - data_type_t type = NVP_TYPE(nvp); - int size1, size2; - - /* verify nvp_name_sz, check the name string length */ - if (i_validate_nvpair_name(nvp) != 0) - return (EFAULT); - - if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0) - return (EFAULT); - - /* - * verify nvp_type, nvp_value_elem, and also possibly - * verify string values and get the value size. - */ - size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp)); - size1 = nvp->nvp_size - NVP_VALOFF(nvp); - if (size2 < 0 || size1 != NV_ALIGN(size2)) - return (EFAULT); - - return (0); -} - -static int -nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl) -{ - nvpriv_t *priv; - i_nvp_t *curr; - - if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL) - return (EINVAL); - - for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { - nvpair_t *nvp = &curr->nvi_nvp; - int err; - - if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp), - NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0) - return (err); - } - - return (0); -} - -/* - * Frees all memory allocated for an nvpair (like embedded lists) with - * the exception of the nvpair buffer itself. - */ -static void -nvpair_free(nvpair_t *nvp) -{ - switch (NVP_TYPE(nvp)) { - case DATA_TYPE_NVLIST: - nvlist_free(EMBEDDED_NVL(nvp)); - break; - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); - int i; - - for (i = 0; i < NVP_NELEM(nvp); i++) - nvlist_free(nvlp[i]); - break; - } - default: - break; - } -} - -/* - * nvlist_free - free an unpacked nvlist - */ -void -nvlist_free(nvlist_t *nvl) -{ - nvpriv_t *priv; - i_nvp_t *curr; - - if (nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return; - - /* - * Unpacked nvlist are linked through i_nvp_t - */ - curr = priv->nvp_list; - while (curr != NULL) { - nvpair_t *nvp = &curr->nvi_nvp; - curr = curr->nvi_next; - - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - } - - if (!(priv->nvp_stat & NV_STAT_EMBEDDED)) - nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t))); - else - nvl->nvl_priv = 0; - - nvt_tab_free(priv); - nv_mem_free(priv, priv, sizeof (nvpriv_t)); -} - -static int -nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *curr; - - if (nvp == NULL) - return (0); - - for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) - if (&curr->nvi_nvp == nvp) - return (1); - - return (0); -} - -/* - * Make a copy of nvlist - */ -/*ARGSUSED1*/ -int -nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag) -{ -#if defined(_KERNEL) && !defined(_BOOT) - return (nvlist_xdup(nvl, nvlp, - (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); -#else - return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep)); -#endif -} - -int -nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva) -{ - int err; - nvlist_t *ret; - - if (nvl == NULL || nvlp == NULL) - return (EINVAL); - - if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0) - return (err); - - if ((err = nvlist_copy_pairs(nvl, ret)) != 0) - nvlist_free(ret); - else - *nvlp = ret; - - return (err); -} - -/* - * Remove all with matching name - */ -int -nvlist_remove_all(nvlist_t *nvl, const char *name) -{ - int error = ENOENT; - - if (nvl == NULL || name == NULL || nvl->nvl_priv == 0) - return (EINVAL); - - nvpair_t *nvp; - while ((nvp = nvt_lookup_name(nvl, name)) != NULL) { - VERIFY0(nvlist_remove_nvpair(nvl, nvp)); - error = 0; - } - - return (error); -} - -/* - * Remove first one with matching name and type - */ -int -nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) -{ - if (nvl == NULL || name == NULL || nvl->nvl_priv == 0) - return (EINVAL); - - nvpair_t *nvp = nvt_lookup_name_type(nvl, name, type); - if (nvp == NULL) - return (ENOENT); - - return (nvlist_remove_nvpair(nvl, nvp)); -} - -int -nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - if (nvl == NULL || nvp == NULL) - return (EINVAL); - - int err = nvt_remove_nvpair(nvl, nvp); - if (err != 0) - return (err); - - nvp_buf_unlink(nvl, nvp); - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - return (0); -} - -/* - * This function calculates the size of an nvpair value. - * - * The data argument controls the behavior in case of the data types - * DATA_TYPE_STRING and - * DATA_TYPE_STRING_ARRAY - * Is data == NULL then the size of the string(s) is excluded. - */ -static int -i_get_value_size(data_type_t type, const void *data, uint_t nelem) -{ - uint64_t value_sz; - - if (i_validate_type_nelem(type, nelem) != 0) - return (-1); - - /* Calculate required size for holding value */ - switch (type) { - case DATA_TYPE_BOOLEAN: - value_sz = 0; - break; - case DATA_TYPE_BOOLEAN_VALUE: - value_sz = sizeof (boolean_t); - break; - case DATA_TYPE_BYTE: - value_sz = sizeof (uchar_t); - break; - case DATA_TYPE_INT8: - value_sz = sizeof (int8_t); - break; - case DATA_TYPE_UINT8: - value_sz = sizeof (uint8_t); - break; - case DATA_TYPE_INT16: - value_sz = sizeof (int16_t); - break; - case DATA_TYPE_UINT16: - value_sz = sizeof (uint16_t); - break; - case DATA_TYPE_INT32: - value_sz = sizeof (int32_t); - break; - case DATA_TYPE_UINT32: - value_sz = sizeof (uint32_t); - break; - case DATA_TYPE_INT64: - value_sz = sizeof (int64_t); - break; - case DATA_TYPE_UINT64: - value_sz = sizeof (uint64_t); - break; -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: - value_sz = sizeof (double); - break; -#endif - case DATA_TYPE_STRING: - if (data == NULL) - value_sz = 0; - else - value_sz = strlen(data) + 1; - break; - case DATA_TYPE_BOOLEAN_ARRAY: - value_sz = (uint64_t)nelem * sizeof (boolean_t); - break; - case DATA_TYPE_BYTE_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uchar_t); - break; - case DATA_TYPE_INT8_ARRAY: - value_sz = (uint64_t)nelem * sizeof (int8_t); - break; - case DATA_TYPE_UINT8_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint8_t); - break; - case DATA_TYPE_INT16_ARRAY: - value_sz = (uint64_t)nelem * sizeof (int16_t); - break; - case DATA_TYPE_UINT16_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint16_t); - break; - case DATA_TYPE_INT32_ARRAY: - value_sz = (uint64_t)nelem * sizeof (int32_t); - break; - case DATA_TYPE_UINT32_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint32_t); - break; - case DATA_TYPE_INT64_ARRAY: - value_sz = (uint64_t)nelem * sizeof (int64_t); - break; - case DATA_TYPE_UINT64_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint64_t); - break; - case DATA_TYPE_STRING_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint64_t); - - if (data != NULL) { - char *const *strs = data; - uint_t i; - - /* no alignment requirement for strings */ - for (i = 0; i < nelem; i++) { - if (strs[i] == NULL) - return (-1); - value_sz += strlen(strs[i]) + 1; - } - } - break; - case DATA_TYPE_HRTIME: - value_sz = sizeof (hrtime_t); - break; - case DATA_TYPE_NVLIST: - value_sz = NV_ALIGN(sizeof (nvlist_t)); - break; - case DATA_TYPE_NVLIST_ARRAY: - value_sz = (uint64_t)nelem * sizeof (uint64_t) + - (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t)); - break; - default: - return (-1); - } - - return (value_sz > INT32_MAX ? -1 : (int)value_sz); -} - -static int -nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl) -{ - nvpriv_t *priv; - int err; - - if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t) - nvl->nvl_priv)) == NULL) - return (ENOMEM); - - nvlist_init(emb_nvl, onvl->nvl_nvflag, priv); - - if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) { - nvlist_free(emb_nvl); - emb_nvl->nvl_priv = 0; - } - - return (err); -} - -/* - * nvlist_add_common - Add new <name,value> pair to nvlist - */ -static int -nvlist_add_common(nvlist_t *nvl, const char *name, - data_type_t type, uint_t nelem, const void *data) -{ - nvpair_t *nvp; - uint_t i; - - int nvp_sz, name_sz, value_sz; - int err = 0; - - if (name == NULL || nvl == NULL || nvl->nvl_priv == 0) - return (EINVAL); - - if (nelem != 0 && data == NULL) - return (EINVAL); - - /* - * Verify type and nelem and get the value size. - * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY - * is the size of the string(s) included. - */ - if ((value_sz = i_get_value_size(type, data, nelem)) < 0) - return (EINVAL); - - if (i_validate_nvpair_value(type, nelem, data) != 0) - return (EINVAL); - - /* - * If we're adding an nvlist or nvlist array, ensure that we are not - * adding the input nvlist to itself, which would cause recursion, - * and ensure that no NULL nvlist pointers are present. - */ - switch (type) { - case DATA_TYPE_NVLIST: - if (data == nvl || data == NULL) - return (EINVAL); - break; - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **onvlp = (nvlist_t **)data; - for (i = 0; i < nelem; i++) { - if (onvlp[i] == nvl || onvlp[i] == NULL) - return (EINVAL); - } - break; - } - default: - break; - } - - /* calculate sizes of the nvpair elements and the nvpair itself */ - name_sz = strlen(name) + 1; - if (name_sz >= 1ULL << (sizeof (nvp->nvp_name_sz) * 8 - 1)) - return (EINVAL); - - nvp_sz = NVP_SIZE_CALC(name_sz, value_sz); - - if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL) - return (ENOMEM); - - ASSERT(nvp->nvp_size == nvp_sz); - nvp->nvp_name_sz = name_sz; - nvp->nvp_value_elem = nelem; - nvp->nvp_type = type; - bcopy(name, NVP_NAME(nvp), name_sz); - - switch (type) { - case DATA_TYPE_BOOLEAN: - break; - case DATA_TYPE_STRING_ARRAY: { - char *const *strs = data; - char *buf = NVP_VALUE(nvp); - char **cstrs = (void *)buf; - - /* skip pre-allocated space for pointer array */ - buf += nelem * sizeof (uint64_t); - for (i = 0; i < nelem; i++) { - int slen = strlen(strs[i]) + 1; - bcopy(strs[i], buf, slen); - cstrs[i] = buf; - buf += slen; - } - break; - } - case DATA_TYPE_NVLIST: { - nvlist_t *nnvl = EMBEDDED_NVL(nvp); - nvlist_t *onvl = (nvlist_t *)data; - - if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) { - nvp_buf_free(nvl, nvp); - return (err); - } - break; - } - case DATA_TYPE_NVLIST_ARRAY: { - nvlist_t **onvlp = (nvlist_t **)data; - nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); - nvlist_t *embedded = (nvlist_t *) - ((uintptr_t)nvlp + nelem * sizeof (uint64_t)); - - for (i = 0; i < nelem; i++) { - if ((err = nvlist_copy_embedded(nvl, - onvlp[i], embedded)) != 0) { - /* - * Free any successfully created lists - */ - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - return (err); - } - - nvlp[i] = embedded++; - } - break; - } - default: - bcopy(data, NVP_VALUE(nvp), value_sz); - } - - /* if unique name, remove before add */ - if (nvl->nvl_nvflag & NV_UNIQUE_NAME) - (void) nvlist_remove_all(nvl, name); - else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE) - (void) nvlist_remove(nvl, name, type); - - err = nvt_add_nvpair(nvl, nvp); - if (err != 0) { - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - return (err); - } - nvp_buf_link(nvl, nvp); - - return (0); -} - -int -nvlist_add_boolean(nvlist_t *nvl, const char *name) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL)); -} - -int -nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val)); -} - -int -nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val)); -} - -int -nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val)); -} - -int -nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val)); -} - -int -nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val)); -} - -int -nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val)); -} - -int -nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val)); -} - -int -nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val)); -} - -int -nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val)); -} - -int -nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val)); -} - -#if !defined(_KERNEL) -int -nvlist_add_double(nvlist_t *nvl, const char *name, double val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val)); -} -#endif - -int -nvlist_add_string(nvlist_t *nvl, const char *name, const char *val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val)); -} - -int -nvlist_add_boolean_array(nvlist_t *nvl, const char *name, - boolean_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); -} - -int -nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); -} - -int -nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); -} - -int -nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); -} - -int -nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); -} - -int -nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); -} - -int -nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); -} - -int -nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); -} - -int -nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); -} - -int -nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); -} - -int -nvlist_add_string_array(nvlist_t *nvl, const char *name, - char *const *a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); -} - -int -nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val)); -} - -int -nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); -} - -int -nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n) -{ - return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); -} - -/* reading name-value pairs */ -nvpair_t * -nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv; - i_nvp_t *curr; - - if (nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (NULL); - - curr = NVPAIR2I_NVP(nvp); - - /* - * Ensure that nvp is a valid nvpair on this nvlist. - * NB: nvp_curr is used only as a hint so that we don't always - * have to walk the list to determine if nvp is still on the list. - */ - if (nvp == NULL) - curr = priv->nvp_list; - else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) - curr = curr->nvi_next; - else - curr = NULL; - - priv->nvp_curr = curr; - - return (curr != NULL ? &curr->nvi_nvp : NULL); -} - -nvpair_t * -nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - nvpriv_t *priv; - i_nvp_t *curr; - - if (nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (NULL); - - curr = NVPAIR2I_NVP(nvp); - - if (nvp == NULL) - curr = priv->nvp_last; - else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp)) - curr = curr->nvi_prev; - else - curr = NULL; - - priv->nvp_curr = curr; - - return (curr != NULL ? &curr->nvi_nvp : NULL); -} - -boolean_t -nvlist_empty(nvlist_t *nvl) -{ - nvpriv_t *priv; - - if (nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (B_TRUE); - - return (priv->nvp_list == NULL); -} - -char * -nvpair_name(nvpair_t *nvp) -{ - return (NVP_NAME(nvp)); -} - -data_type_t -nvpair_type(nvpair_t *nvp) -{ - return (NVP_TYPE(nvp)); -} - -int -nvpair_type_is_array(nvpair_t *nvp) -{ - data_type_t type = NVP_TYPE(nvp); - - if ((type == DATA_TYPE_BYTE_ARRAY) || - (type == DATA_TYPE_INT8_ARRAY) || - (type == DATA_TYPE_UINT8_ARRAY) || - (type == DATA_TYPE_INT16_ARRAY) || - (type == DATA_TYPE_UINT16_ARRAY) || - (type == DATA_TYPE_INT32_ARRAY) || - (type == DATA_TYPE_UINT32_ARRAY) || - (type == DATA_TYPE_INT64_ARRAY) || - (type == DATA_TYPE_UINT64_ARRAY) || - (type == DATA_TYPE_BOOLEAN_ARRAY) || - (type == DATA_TYPE_STRING_ARRAY) || - (type == DATA_TYPE_NVLIST_ARRAY)) - return (1); - return (0); - -} - -static int -nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data) -{ - if (nvp == NULL || nvpair_type(nvp) != type) - return (EINVAL); - - /* - * For non-array types, we copy the data. - * For array types (including string), we set a pointer. - */ - switch (type) { - case DATA_TYPE_BOOLEAN: - if (nelem != NULL) - *nelem = 0; - break; - - case DATA_TYPE_BOOLEAN_VALUE: - case DATA_TYPE_BYTE: - case DATA_TYPE_INT8: - case DATA_TYPE_UINT8: - case DATA_TYPE_INT16: - case DATA_TYPE_UINT16: - case DATA_TYPE_INT32: - case DATA_TYPE_UINT32: - case DATA_TYPE_INT64: - case DATA_TYPE_UINT64: - case DATA_TYPE_HRTIME: -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: -#endif - if (data == NULL) - return (EINVAL); - bcopy(NVP_VALUE(nvp), data, - (size_t)i_get_value_size(type, NULL, 1)); - if (nelem != NULL) - *nelem = 1; - break; - - case DATA_TYPE_NVLIST: - case DATA_TYPE_STRING: - if (data == NULL) - return (EINVAL); - *(void **)data = (void *)NVP_VALUE(nvp); - if (nelem != NULL) - *nelem = 1; - break; - - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: - case DATA_TYPE_NVLIST_ARRAY: - if (nelem == NULL || data == NULL) - return (EINVAL); - if ((*nelem = NVP_NELEM(nvp)) != 0) - *(void **)data = (void *)NVP_VALUE(nvp); - else - *(void **)data = NULL; - break; - - default: - return (ENOTSUP); - } - - return (0); -} - -static int -nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type, - uint_t *nelem, void *data) -{ - if (name == NULL || nvl == NULL || nvl->nvl_priv == 0) - return (EINVAL); - - if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE))) - return (ENOTSUP); - - nvpair_t *nvp = nvt_lookup_name_type(nvl, name, type); - if (nvp == NULL) - return (ENOENT); - - return (nvpair_value_common(nvp, type, nelem, data)); -} - -int -nvlist_lookup_boolean(nvlist_t *nvl, const char *name) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL)); -} - -int -nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val) -{ - return (nvlist_lookup_common(nvl, name, - DATA_TYPE_BOOLEAN_VALUE, NULL, val)); -} - -int -nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val)); -} - -int -nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val)); -} - -int -nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val)); -} - -int -nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val)); -} - -int -nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val)); -} - -int -nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val)); -} - -int -nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val)); -} - -int -nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val)); -} - -int -nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val)); -} - -#if !defined(_KERNEL) -int -nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val)); -} -#endif - -int -nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val)); -} - -int -nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val)); -} - -int -nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name, - boolean_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, - DATA_TYPE_BOOLEAN_ARRAY, n, a)); -} - -int -nvlist_lookup_byte_array(nvlist_t *nvl, const char *name, - uchar_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); -} - -int -nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); -} - -int -nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name, - uint8_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); -} - -int -nvlist_lookup_int16_array(nvlist_t *nvl, const char *name, - int16_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); -} - -int -nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name, - uint16_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); -} - -int -nvlist_lookup_int32_array(nvlist_t *nvl, const char *name, - int32_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); -} - -int -nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name, - uint32_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); -} - -int -nvlist_lookup_int64_array(nvlist_t *nvl, const char *name, - int64_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); -} - -int -nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name, - uint64_t **a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); -} - -int -nvlist_lookup_string_array(nvlist_t *nvl, const char *name, - char ***a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); -} - -int -nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name, - nvlist_t ***a, uint_t *n) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); -} - -int -nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val) -{ - return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val)); -} - -int -nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...) -{ - va_list ap; - char *name; - int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0); - int ret = 0; - - va_start(ap, flag); - while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { - data_type_t type; - void *val; - uint_t *nelem; - - switch (type = va_arg(ap, data_type_t)) { - case DATA_TYPE_BOOLEAN: - ret = nvlist_lookup_common(nvl, name, type, NULL, NULL); - break; - - case DATA_TYPE_BOOLEAN_VALUE: - case DATA_TYPE_BYTE: - case DATA_TYPE_INT8: - case DATA_TYPE_UINT8: - case DATA_TYPE_INT16: - case DATA_TYPE_UINT16: - case DATA_TYPE_INT32: - case DATA_TYPE_UINT32: - case DATA_TYPE_INT64: - case DATA_TYPE_UINT64: - case DATA_TYPE_HRTIME: - case DATA_TYPE_STRING: - case DATA_TYPE_NVLIST: -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: -#endif - val = va_arg(ap, void *); - ret = nvlist_lookup_common(nvl, name, type, NULL, val); - break; - - case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: - case DATA_TYPE_NVLIST_ARRAY: - val = va_arg(ap, void *); - nelem = va_arg(ap, uint_t *); - ret = nvlist_lookup_common(nvl, name, type, nelem, val); - break; - - default: - ret = EINVAL; - } - - if (ret == ENOENT && noentok) - ret = 0; - } - va_end(ap); - - return (ret); -} - -/* - * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function - * returns zero and a pointer to the matching nvpair is returned in '*ret' - * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate - * multiple levels of embedded nvlists, with 'sep' as the separator. As an - * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or - * "a.d[3].e[1]". This matches the C syntax for array embed (for convience, - * code also supports "a.d[3]e[1]" syntax). - * - * If 'ip' is non-NULL and the last name component is an array, return the - * value of the "...[index]" array index in *ip. For an array reference that - * is not indexed, *ip will be returned as -1. If there is a syntax error in - * 'name', and 'ep' is non-NULL then *ep will be set to point to the location - * inside the 'name' string where the syntax error was detected. - */ -static int -nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep, - nvpair_t **ret, int *ip, char **ep) -{ - nvpair_t *nvp; - const char *np; - char *sepp; - char *idxp, *idxep; - nvlist_t **nva; - long idx; - int n; - - if (ip) - *ip = -1; /* not indexed */ - if (ep) - *ep = NULL; - - if ((nvl == NULL) || (name == NULL)) - return (EINVAL); - - sepp = NULL; - idx = 0; - /* step through components of name */ - for (np = name; np && *np; np = sepp) { - /* ensure unique names */ - if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME)) - return (ENOTSUP); - - /* skip white space */ - skip_whitespace(np); - if (*np == 0) - break; - - /* set 'sepp' to end of current component 'np' */ - if (sep) - sepp = strchr(np, sep); - else - sepp = NULL; - - /* find start of next "[ index ]..." */ - idxp = strchr(np, '['); - - /* if sepp comes first, set idxp to NULL */ - if (sepp && idxp && (sepp < idxp)) - idxp = NULL; - - /* - * At this point 'idxp' is set if there is an index - * expected for the current component. - */ - if (idxp) { - /* set 'n' to length of current 'np' name component */ - n = idxp++ - np; - - /* keep sepp up to date for *ep use as we advance */ - skip_whitespace(idxp); - sepp = idxp; - - /* determine the index value */ -#if defined(_KERNEL) && !defined(_BOOT) - if (ddi_strtol(idxp, &idxep, 0, &idx)) - goto fail; -#else - idx = strtol(idxp, &idxep, 0); -#endif - if (idxep == idxp) - goto fail; - - /* keep sepp up to date for *ep use as we advance */ - sepp = idxep; - - /* skip white space index value and check for ']' */ - skip_whitespace(sepp); - if (*sepp++ != ']') - goto fail; - - /* for embedded arrays, support C syntax: "a[1].b" */ - skip_whitespace(sepp); - if (sep && (*sepp == sep)) - sepp++; - } else if (sepp) { - n = sepp++ - np; - } else { - n = strlen(np); - } - - /* trim trailing whitespace by reducing length of 'np' */ - if (n == 0) - goto fail; - for (n--; (np[n] == ' ') || (np[n] == '\t'); n--) - ; - n++; - - /* skip whitespace, and set sepp to NULL if complete */ - if (sepp) { - skip_whitespace(sepp); - if (*sepp == 0) - sepp = NULL; - } - - /* - * At this point: - * o 'n' is the length of current 'np' component. - * o 'idxp' is set if there was an index, and value 'idx'. - * o 'sepp' is set to the beginning of the next component, - * and set to NULL if we have no more components. - * - * Search for nvpair with matching component name. - */ - for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL; - nvp = nvlist_next_nvpair(nvl, nvp)) { - - /* continue if no match on name */ - if (strncmp(np, nvpair_name(nvp), n) || - (strlen(nvpair_name(nvp)) != n)) - continue; - - /* if indexed, verify type is array oriented */ - if (idxp && !nvpair_type_is_array(nvp)) - goto fail; - - /* - * Full match found, return nvp and idx if this - * was the last component. - */ - if (sepp == NULL) { - if (ret) - *ret = nvp; - if (ip && idxp) - *ip = (int)idx; /* return index */ - return (0); /* found */ - } - - /* - * More components: current match must be - * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY - * to support going deeper. - */ - if (nvpair_type(nvp) == DATA_TYPE_NVLIST) { - nvl = EMBEDDED_NVL(nvp); - break; - } else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) { - (void) nvpair_value_nvlist_array(nvp, - &nva, (uint_t *)&n); - if ((n < 0) || (idx >= n)) - goto fail; - nvl = nva[idx]; - break; - } - - /* type does not support more levels */ - goto fail; - } - if (nvp == NULL) - goto fail; /* 'name' not found */ - - /* search for match of next component in embedded 'nvl' list */ - } - -fail: if (ep && sepp) - *ep = sepp; - return (EINVAL); -} - -/* - * Return pointer to nvpair with specified 'name'. - */ -int -nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret) -{ - return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL)); -} - -/* - * Determine if named nvpair exists in nvlist (use embedded separator of '.' - * and return array index). See nvlist_lookup_nvpair_ei_sep for more detailed - * description. - */ -int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl, - const char *name, nvpair_t **ret, int *ip, char **ep) -{ - return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep)); -} - -boolean_t -nvlist_exists(nvlist_t *nvl, const char *name) -{ - nvpriv_t *priv; - nvpair_t *nvp; - i_nvp_t *curr; - - if (name == NULL || nvl == NULL || - (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (B_FALSE); - - for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { - nvp = &curr->nvi_nvp; - - if (strcmp(name, NVP_NAME(nvp)) == 0) - return (B_TRUE); - } - - return (B_FALSE); -} - -int -nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val)); -} - -int -nvpair_value_byte(nvpair_t *nvp, uchar_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val)); -} - -int -nvpair_value_int8(nvpair_t *nvp, int8_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val)); -} - -int -nvpair_value_uint8(nvpair_t *nvp, uint8_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val)); -} - -int -nvpair_value_int16(nvpair_t *nvp, int16_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val)); -} - -int -nvpair_value_uint16(nvpair_t *nvp, uint16_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val)); -} - -int -nvpair_value_int32(nvpair_t *nvp, int32_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val)); -} - -int -nvpair_value_uint32(nvpair_t *nvp, uint32_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val)); -} - -int -nvpair_value_int64(nvpair_t *nvp, int64_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val)); -} - -int -nvpair_value_uint64(nvpair_t *nvp, uint64_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val)); -} - -#if !defined(_KERNEL) -int -nvpair_value_double(nvpair_t *nvp, double *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val)); -} -#endif - -int -nvpair_value_string(nvpair_t *nvp, char **val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val)); -} - -int -nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val)); -} - -int -nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val)); -} - -int -nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val)); -} - -int -nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val)); -} - -int -nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val)); -} - -int -nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val)); -} - -int -nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val)); -} - -int -nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val)); -} - -int -nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val)); -} - -int -nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val)); -} - -int -nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val)); -} - -int -nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val)); -} - -int -nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem) -{ - return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val)); -} - -int -nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val) -{ - return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val)); -} - -/* - * Add specified pair to the list. - */ -int -nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp) -{ - if (nvl == NULL || nvp == NULL) - return (EINVAL); - - return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp), - NVP_NELEM(nvp), NVP_VALUE(nvp))); -} - -/* - * Merge the supplied nvlists and put the result in dst. - * The merged list will contain all names specified in both lists, - * the values are taken from nvl in the case of duplicates. - * Return 0 on success. - */ -/*ARGSUSED*/ -int -nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag) -{ - if (nvl == NULL || dst == NULL) - return (EINVAL); - - if (dst != nvl) - return (nvlist_copy_pairs(nvl, dst)); - - return (0); -} - -/* - * Encoding related routines - */ -#define NVS_OP_ENCODE 0 -#define NVS_OP_DECODE 1 -#define NVS_OP_GETSIZE 2 - -typedef struct nvs_ops nvs_ops_t; - -typedef struct { - int nvs_op; - const nvs_ops_t *nvs_ops; - void *nvs_private; - nvpriv_t *nvs_priv; - int nvs_recursion; -} nvstream_t; - -/* - * nvs operations are: - * - nvs_nvlist - * encoding / decoding of a nvlist header (nvlist_t) - * calculates the size used for header and end detection - * - * - nvs_nvpair - * responsible for the first part of encoding / decoding of an nvpair - * calculates the decoded size of an nvpair - * - * - nvs_nvp_op - * second part of encoding / decoding of an nvpair - * - * - nvs_nvp_size - * calculates the encoding size of an nvpair - * - * - nvs_nvl_fini - * encodes the end detection mark (zeros). - */ -struct nvs_ops { - int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *); - int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *); - int (*nvs_nvp_op)(nvstream_t *, nvpair_t *); - int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *); - int (*nvs_nvl_fini)(nvstream_t *); -}; - -typedef struct { - char nvh_encoding; /* nvs encoding method */ - char nvh_endian; /* nvs endian */ - char nvh_reserved1; /* reserved for future use */ - char nvh_reserved2; /* reserved for future use */ -} nvs_header_t; - -static int -nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *curr; - - /* - * Walk nvpair in list and encode each nvpair - */ - for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) - if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0) - return (EFAULT); - - return (nvs->nvs_ops->nvs_nvl_fini(nvs)); -} - -static int -nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl) -{ - nvpair_t *nvp; - size_t nvsize; - int err; - - /* - * Get decoded size of next pair in stream, alloc - * memory for nvpair_t, then decode the nvpair - */ - while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) { - if (nvsize == 0) /* end of list */ - break; - - /* make sure len makes sense */ - if (nvsize < NVP_SIZE_CALC(1, 0)) - return (EFAULT); - - if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL) - return (ENOMEM); - - if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) { - nvp_buf_free(nvl, nvp); - return (err); - } - - if (i_validate_nvpair(nvp) != 0) { - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - return (EFAULT); - } - - err = nvt_add_nvpair(nvl, nvp); - if (err != 0) { - nvpair_free(nvp); - nvp_buf_free(nvl, nvp); - return (err); - } - nvp_buf_link(nvl, nvp); - } - return (err); -} - -static int -nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) -{ - nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv; - i_nvp_t *curr; - uint64_t nvsize = *buflen; - size_t size; - - /* - * Get encoded size of nvpairs in nvlist - */ - for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) { - if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0) - return (EINVAL); - - if ((nvsize += size) > INT32_MAX) - return (EINVAL); - } - - *buflen = nvsize; - return (0); -} - -static int -nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen) -{ - int err; - - if (nvl->nvl_priv == 0) - return (EFAULT); - - /* - * Perform the operation, starting with header, then each nvpair - */ - if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0) - return (err); - - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - err = nvs_encode_pairs(nvs, nvl); - break; - - case NVS_OP_DECODE: - err = nvs_decode_pairs(nvs, nvl); - break; - - case NVS_OP_GETSIZE: - err = nvs_getsize_pairs(nvs, nvl, buflen); - break; - - default: - err = EINVAL; - } - - return (err); -} - -static int -nvs_embedded(nvstream_t *nvs, nvlist_t *embedded) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: { - int err; - - if (nvs->nvs_recursion >= nvpair_max_recursion) - return (EINVAL); - nvs->nvs_recursion++; - err = nvs_operation(nvs, embedded, NULL); - nvs->nvs_recursion--; - return (err); - } - case NVS_OP_DECODE: { - nvpriv_t *priv; - int err; - - if (embedded->nvl_version != NV_VERSION) - return (ENOTSUP); - - if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL) - return (ENOMEM); - - nvlist_init(embedded, embedded->nvl_nvflag, priv); - - if (nvs->nvs_recursion >= nvpair_max_recursion) { - nvlist_free(embedded); - return (EINVAL); - } - nvs->nvs_recursion++; - if ((err = nvs_operation(nvs, embedded, NULL)) != 0) - nvlist_free(embedded); - nvs->nvs_recursion--; - return (err); - } - default: - break; - } - - return (EINVAL); -} - -static int -nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size) -{ - size_t nelem = NVP_NELEM(nvp); - nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp); - int i; - - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - for (i = 0; i < nelem; i++) - if (nvs_embedded(nvs, nvlp[i]) != 0) - return (EFAULT); - break; - - case NVS_OP_DECODE: { - size_t len = nelem * sizeof (uint64_t); - nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len); - - bzero(nvlp, len); /* don't trust packed data */ - for (i = 0; i < nelem; i++) { - if (nvs_embedded(nvs, embedded) != 0) { - nvpair_free(nvp); - return (EFAULT); - } - - nvlp[i] = embedded++; - } - break; - } - case NVS_OP_GETSIZE: { - uint64_t nvsize = 0; - - for (i = 0; i < nelem; i++) { - size_t nvp_sz = 0; - - if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0) - return (EINVAL); - - if ((nvsize += nvp_sz) > INT32_MAX) - return (EINVAL); - } - - *size = nvsize; - break; - } - default: - return (EINVAL); - } - - return (0); -} - -static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *); -static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *); - -/* - * Common routine for nvlist operations: - * encode, decode, getsize (encoded size). - */ -static int -nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding, - int nvs_op) -{ - int err = 0; - nvstream_t nvs; - int nvl_endian; -#if BYTE_ORDER == _LITTLE_ENDIAN - int host_endian = 1; -#else - int host_endian = 0; -#endif /* _LITTLE_ENDIAN */ - nvs_header_t *nvh = (void *)buf; - - if (buflen == NULL || nvl == NULL || - (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL) - return (EINVAL); - - nvs.nvs_op = nvs_op; - nvs.nvs_recursion = 0; - - /* - * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and - * a buffer is allocated. The first 4 bytes in the buffer are - * used for encoding method and host endian. - */ - switch (nvs_op) { - case NVS_OP_ENCODE: - if (buf == NULL || *buflen < sizeof (nvs_header_t)) - return (EINVAL); - - nvh->nvh_encoding = encoding; - nvh->nvh_endian = nvl_endian = host_endian; - nvh->nvh_reserved1 = 0; - nvh->nvh_reserved2 = 0; - break; - - case NVS_OP_DECODE: - if (buf == NULL || *buflen < sizeof (nvs_header_t)) - return (EINVAL); - - /* get method of encoding from first byte */ - encoding = nvh->nvh_encoding; - nvl_endian = nvh->nvh_endian; - break; - - case NVS_OP_GETSIZE: - nvl_endian = host_endian; - - /* - * add the size for encoding - */ - *buflen = sizeof (nvs_header_t); - break; - - default: - return (ENOTSUP); - } - - /* - * Create an nvstream with proper encoding method - */ - switch (encoding) { - case NV_ENCODE_NATIVE: - /* - * check endianness, in case we are unpacking - * from a file - */ - if (nvl_endian != host_endian) - return (ENOTSUP); - err = nvs_native(&nvs, nvl, buf, buflen); - break; - case NV_ENCODE_XDR: - err = nvs_xdr(&nvs, nvl, buf, buflen); - break; - default: - err = ENOTSUP; - break; - } - - return (err); -} - -int -nvlist_size(nvlist_t *nvl, size_t *size, int encoding) -{ - return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE)); -} - -/* - * Pack nvlist into contiguous memory - */ -/*ARGSUSED1*/ -int -nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, - int kmflag) -{ -#if defined(_KERNEL) && !defined(_BOOT) - return (nvlist_xpack(nvl, bufp, buflen, encoding, - (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); -#else - return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep)); -#endif -} - -int -nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding, - nv_alloc_t *nva) -{ - nvpriv_t nvpriv; - size_t alloc_size; - char *buf; - int err; - - if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL) - return (EINVAL); - - if (*bufp != NULL) - return (nvlist_common(nvl, *bufp, buflen, encoding, - NVS_OP_ENCODE)); - - /* - * Here is a difficult situation: - * 1. The nvlist has fixed allocator properties. - * All other nvlist routines (like nvlist_add_*, ...) use - * these properties. - * 2. When using nvlist_pack() the user can specify their own - * allocator properties (e.g. by using KM_NOSLEEP). - * - * We use the user specified properties (2). A clearer solution - * will be to remove the kmflag from nvlist_pack(), but we will - * not change the interface. - */ - nv_priv_init(&nvpriv, nva, 0); - - if ((err = nvlist_size(nvl, &alloc_size, encoding))) - return (err); - - if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL) - return (ENOMEM); - - if ((err = nvlist_common(nvl, buf, &alloc_size, encoding, - NVS_OP_ENCODE)) != 0) { - nv_mem_free(&nvpriv, buf, alloc_size); - } else { - *buflen = alloc_size; - *bufp = buf; - } - - return (err); -} - -/* - * Unpack buf into an nvlist_t - */ -/*ARGSUSED1*/ -int -nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag) -{ -#if defined(_KERNEL) && !defined(_BOOT) - return (nvlist_xunpack(buf, buflen, nvlp, - (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep))); -#else - return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep)); -#endif -} - -int -nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva) -{ - nvlist_t *nvl; - int err; - - if (nvlp == NULL) - return (EINVAL); - - if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0) - return (err); - - if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0) - nvlist_free(nvl); - else - *nvlp = nvl; - - return (err); -} - -/* - * Native encoding functions - */ -typedef struct { - /* - * This structure is used when decoding a packed nvpair in - * the native format. n_base points to a buffer containing the - * packed nvpair. n_end is a pointer to the end of the buffer. - * (n_end actually points to the first byte past the end of the - * buffer.) n_curr is a pointer that lies between n_base and n_end. - * It points to the current data that we are decoding. - * The amount of data left in the buffer is equal to n_end - n_curr. - * n_flag is used to recognize a packed embedded list. - */ - caddr_t n_base; - caddr_t n_end; - caddr_t n_curr; - uint_t n_flag; -} nvs_native_t; - -static int -nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf, - size_t buflen) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - case NVS_OP_DECODE: - nvs->nvs_private = native; - native->n_curr = native->n_base = buf; - native->n_end = buf + buflen; - native->n_flag = 0; - return (0); - - case NVS_OP_GETSIZE: - nvs->nvs_private = native; - native->n_curr = native->n_base = native->n_end = NULL; - native->n_flag = 0; - return (0); - default: - return (EINVAL); - } -} - -/*ARGSUSED*/ -static void -nvs_native_destroy(nvstream_t *nvs) -{ -} - -static int -native_cp(nvstream_t *nvs, void *buf, size_t size) -{ - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - - if (native->n_curr + size > native->n_end) - return (EFAULT); - - /* - * The bcopy() below eliminates alignment requirement - * on the buffer (stream) and is preferred over direct access. - */ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - bcopy(buf, native->n_curr, size); - break; - case NVS_OP_DECODE: - bcopy(native->n_curr, buf, size); - break; - default: - return (EINVAL); - } - - native->n_curr += size; - return (0); -} - -/* - * operate on nvlist_t header - */ -static int -nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) -{ - nvs_native_t *native = nvs->nvs_private; - - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - case NVS_OP_DECODE: - if (native->n_flag) - return (0); /* packed embedded list */ - - native->n_flag = 1; - - /* copy version and nvflag of the nvlist_t */ - if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 || - native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0) - return (EFAULT); - - return (0); - - case NVS_OP_GETSIZE: - /* - * if calculate for packed embedded list - * 4 for end of the embedded list - * else - * 2 * sizeof (int32_t) for nvl_version and nvl_nvflag - * and 4 for end of the entire list - */ - if (native->n_flag) { - *size += 4; - } else { - native->n_flag = 1; - *size += 2 * sizeof (int32_t) + 4; - } - - return (0); - - default: - return (EINVAL); - } -} - -static int -nvs_native_nvl_fini(nvstream_t *nvs) -{ - if (nvs->nvs_op == NVS_OP_ENCODE) { - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - /* - * Add 4 zero bytes at end of nvlist. They are used - * for end detection by the decode routine. - */ - if (native->n_curr + sizeof (int) > native->n_end) - return (EFAULT); - - bzero(native->n_curr, sizeof (int)); - native->n_curr += sizeof (int); - } - - return (0); -} - -static int -nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp) -{ - if (nvs->nvs_op == NVS_OP_ENCODE) { - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - char *packed = (void *) - (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); - /* - * Null out the pointer that is meaningless in the packed - * structure. The address may not be aligned, so we have - * to use bzero. - */ - bzero(packed + offsetof(nvlist_t, nvl_priv), - sizeof(((nvlist_t *)NULL)->nvl_priv)); - } - - return (nvs_embedded(nvs, EMBEDDED_NVL(nvp))); -} - -static int -nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp) -{ - if (nvs->nvs_op == NVS_OP_ENCODE) { - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp); - size_t len = NVP_NELEM(nvp) * sizeof (uint64_t); - int i; - /* - * Null out pointers that are meaningless in the packed - * structure. The addresses may not be aligned, so we have - * to use bzero. - */ - bzero(value, len); - - value += len; - for (i = 0; i < NVP_NELEM(nvp); i++) { - /* - * Null out the pointer that is meaningless in the - * packed structure. The address may not be aligned, - * so we have to use bzero. - */ - bzero(value + offsetof(nvlist_t, nvl_priv), - sizeof(((nvlist_t *)NULL)->nvl_priv)); - value += sizeof(nvlist_t); - } - } - - return (nvs_embedded_nvl_array(nvs, nvp, NULL)); -} - -static void -nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: { - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - uint64_t *strp = (void *) - (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp)); - /* - * Null out pointers that are meaningless in the packed - * structure. The addresses may not be aligned, so we have - * to use bzero. - */ - bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t)); - break; - } - case NVS_OP_DECODE: { - char **strp = (void *)NVP_VALUE(nvp); - char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t)); - int i; - - for (i = 0; i < NVP_NELEM(nvp); i++) { - strp[i] = buf; - buf += strlen(buf) + 1; - } - break; - } - } -} - -static int -nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp) -{ - data_type_t type; - int value_sz; - int ret = 0; - - /* - * We do the initial bcopy of the data before we look at - * the nvpair type, because when we're decoding, we won't - * have the correct values for the pair until we do the bcopy. - */ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - case NVS_OP_DECODE: - if (native_cp(nvs, nvp, nvp->nvp_size) != 0) - return (EFAULT); - break; - default: - return (EINVAL); - } - - /* verify nvp_name_sz, check the name string length */ - if (i_validate_nvpair_name(nvp) != 0) - return (EFAULT); - - type = NVP_TYPE(nvp); - - /* - * Verify type and nelem and get the value size. - * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY - * is the size of the string(s) excluded. - */ - if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0) - return (EFAULT); - - if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size) - return (EFAULT); - - switch (type) { - case DATA_TYPE_NVLIST: - ret = nvpair_native_embedded(nvs, nvp); - break; - case DATA_TYPE_NVLIST_ARRAY: - ret = nvpair_native_embedded_array(nvs, nvp); - break; - case DATA_TYPE_STRING_ARRAY: - nvpair_native_string_array(nvs, nvp); - break; - default: - break; - } - - return (ret); -} - -static int -nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) -{ - uint64_t nvp_sz = nvp->nvp_size; - - switch (NVP_TYPE(nvp)) { - case DATA_TYPE_NVLIST: { - size_t nvsize = 0; - - if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0) - return (EINVAL); - - nvp_sz += nvsize; - break; - } - case DATA_TYPE_NVLIST_ARRAY: { - size_t nvsize; - - if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0) - return (EINVAL); - - nvp_sz += nvsize; - break; - } - default: - break; - } - - if (nvp_sz > INT32_MAX) - return (EINVAL); - - *size = nvp_sz; - - return (0); -} - -static int -nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - return (nvs_native_nvp_op(nvs, nvp)); - - case NVS_OP_DECODE: { - nvs_native_t *native = (nvs_native_t *)nvs->nvs_private; - int32_t decode_len; - - /* try to read the size value from the stream */ - if (native->n_curr + sizeof (int32_t) > native->n_end) - return (EFAULT); - bcopy(native->n_curr, &decode_len, sizeof (int32_t)); - - /* sanity check the size value */ - if (decode_len < 0 || - decode_len > native->n_end - native->n_curr) - return (EFAULT); - - *size = decode_len; - - /* - * If at the end of the stream then move the cursor - * forward, otherwise nvpair_native_op() will read - * the entire nvpair at the same cursor position. - */ - if (*size == 0) - native->n_curr += sizeof (int32_t); - break; - } - - default: - return (EINVAL); - } - - return (0); -} - -static const nvs_ops_t nvs_native_ops = { - nvs_native_nvlist, - nvs_native_nvpair, - nvs_native_nvp_op, - nvs_native_nvp_size, - nvs_native_nvl_fini -}; - -static int -nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) -{ - nvs_native_t native; - int err; - - nvs->nvs_ops = &nvs_native_ops; - - if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t), - *buflen - sizeof (nvs_header_t))) != 0) - return (err); - - err = nvs_operation(nvs, nvl, buflen); - - nvs_native_destroy(nvs); - - return (err); -} - -/* - * XDR encoding functions - * - * An xdr packed nvlist is encoded as: - * - * - encoding methode and host endian (4 bytes) - * - nvl_version (4 bytes) - * - nvl_nvflag (4 bytes) - * - * - encoded nvpairs, the format of one xdr encoded nvpair is: - * - encoded size of the nvpair (4 bytes) - * - decoded size of the nvpair (4 bytes) - * - name string, (4 + sizeof(NV_ALIGN4(string)) - * a string is coded as size (4 bytes) and data - * - data type (4 bytes) - * - number of elements in the nvpair (4 bytes) - * - data - * - * - 2 zero's for end of the entire list (8 bytes) - */ -static int -nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen) -{ - /* xdr data must be 4 byte aligned */ - if ((ulong_t)buf % 4 != 0) - return (EFAULT); - - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE); - nvs->nvs_private = xdr; - return (0); - case NVS_OP_DECODE: - xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE); - nvs->nvs_private = xdr; - return (0); - case NVS_OP_GETSIZE: - nvs->nvs_private = NULL; - return (0); - default: - return (EINVAL); - } -} - -static void -nvs_xdr_destroy(nvstream_t *nvs) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - case NVS_OP_DECODE: - xdr_destroy((XDR *)nvs->nvs_private); - break; - default: - break; - } -} - -static int -nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size) -{ - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: - case NVS_OP_DECODE: { - XDR *xdr = nvs->nvs_private; - - if (!xdr_int(xdr, &nvl->nvl_version) || - !xdr_u_int(xdr, &nvl->nvl_nvflag)) - return (EFAULT); - break; - } - case NVS_OP_GETSIZE: { - /* - * 2 * 4 for nvl_version + nvl_nvflag - * and 8 for end of the entire list - */ - *size += 2 * 4 + 8; - break; - } - default: - return (EINVAL); - } - return (0); -} - -static int -nvs_xdr_nvl_fini(nvstream_t *nvs) -{ - if (nvs->nvs_op == NVS_OP_ENCODE) { - XDR *xdr = nvs->nvs_private; - int zero = 0; - - if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero)) - return (EFAULT); - } - - return (0); -} - -/* - * The format of xdr encoded nvpair is: - * encode_size, decode_size, name string, data type, nelem, data - */ -static int -nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp) -{ - data_type_t type; - char *buf; - char *buf_end = (char *)nvp + nvp->nvp_size; - int value_sz; - uint_t nelem, buflen; - bool_t ret = FALSE; - XDR *xdr = nvs->nvs_private; - - ASSERT(xdr != NULL && nvp != NULL); - - /* name string */ - if ((buf = NVP_NAME(nvp)) >= buf_end) - return (EFAULT); - buflen = buf_end - buf; - - if (!xdr_string(xdr, &buf, buflen - 1)) - return (EFAULT); - nvp->nvp_name_sz = strlen(buf) + 1; - - /* type and nelem */ - if (!xdr_int(xdr, (int *)&nvp->nvp_type) || - !xdr_int(xdr, &nvp->nvp_value_elem)) - return (EFAULT); - - type = NVP_TYPE(nvp); - nelem = nvp->nvp_value_elem; - - /* - * Verify type and nelem and get the value size. - * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY - * is the size of the string(s) excluded. - */ - if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0) - return (EFAULT); - - /* if there is no data to extract then return */ - if (nelem == 0) - return (0); - - /* value */ - if ((buf = NVP_VALUE(nvp)) >= buf_end) - return (EFAULT); - buflen = buf_end - buf; - - if (buflen < value_sz) - return (EFAULT); - - switch (type) { - case DATA_TYPE_NVLIST: - if (nvs_embedded(nvs, (void *)buf) == 0) - return (0); - break; - - case DATA_TYPE_NVLIST_ARRAY: - if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0) - return (0); - break; - - case DATA_TYPE_BOOLEAN: - ret = TRUE; - break; - - case DATA_TYPE_BYTE: - case DATA_TYPE_INT8: - case DATA_TYPE_UINT8: - ret = xdr_char(xdr, buf); - break; - - case DATA_TYPE_INT16: - ret = xdr_short(xdr, (void *)buf); - break; - - case DATA_TYPE_UINT16: - ret = xdr_u_short(xdr, (void *)buf); - break; - - case DATA_TYPE_BOOLEAN_VALUE: - case DATA_TYPE_INT32: - ret = xdr_int(xdr, (void *)buf); - break; - - case DATA_TYPE_UINT32: - ret = xdr_u_int(xdr, (void *)buf); - break; - - case DATA_TYPE_INT64: - ret = xdr_longlong_t(xdr, (void *)buf); - break; - - case DATA_TYPE_UINT64: - ret = xdr_u_longlong_t(xdr, (void *)buf); - break; - - case DATA_TYPE_HRTIME: - /* - * NOTE: must expose the definition of hrtime_t here - */ - ret = xdr_longlong_t(xdr, (void *)buf); - break; -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: - ret = xdr_double(xdr, (void *)buf); - break; -#endif - case DATA_TYPE_STRING: - ret = xdr_string(xdr, &buf, buflen - 1); - break; - - case DATA_TYPE_BYTE_ARRAY: - ret = xdr_opaque(xdr, buf, nelem); - break; - - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t), - (xdrproc_t)xdr_char); - break; - - case DATA_TYPE_INT16_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t), - sizeof (int16_t), (xdrproc_t)xdr_short); - break; - - case DATA_TYPE_UINT16_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t), - sizeof (uint16_t), (xdrproc_t)xdr_u_short); - break; - - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_INT32_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t), - sizeof (int32_t), (xdrproc_t)xdr_int); - break; - - case DATA_TYPE_UINT32_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t), - sizeof (uint32_t), (xdrproc_t)xdr_u_int); - break; - - case DATA_TYPE_INT64_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t), - sizeof (int64_t), (xdrproc_t)xdr_longlong_t); - break; - - case DATA_TYPE_UINT64_ARRAY: - ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t), - sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t); - break; - - case DATA_TYPE_STRING_ARRAY: { - size_t len = nelem * sizeof (uint64_t); - char **strp = (void *)buf; - int i; - - if (nvs->nvs_op == NVS_OP_DECODE) - bzero(buf, len); /* don't trust packed data */ - - for (i = 0; i < nelem; i++) { - if (buflen <= len) - return (EFAULT); - - buf += len; - buflen -= len; - - if (xdr_string(xdr, &buf, buflen - 1) != TRUE) - return (EFAULT); - - if (nvs->nvs_op == NVS_OP_DECODE) - strp[i] = buf; - len = strlen(buf) + 1; - } - ret = TRUE; - break; - } - default: - break; - } - - return (ret == TRUE ? 0 : EFAULT); -} - -static int -nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size) -{ - data_type_t type = NVP_TYPE(nvp); - /* - * encode_size + decode_size + name string size + data type + nelem - * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) - */ - uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4; - - switch (type) { - case DATA_TYPE_BOOLEAN: - break; - - case DATA_TYPE_BOOLEAN_VALUE: - case DATA_TYPE_BYTE: - case DATA_TYPE_INT8: - case DATA_TYPE_UINT8: - case DATA_TYPE_INT16: - case DATA_TYPE_UINT16: - case DATA_TYPE_INT32: - case DATA_TYPE_UINT32: - nvp_sz += 4; /* 4 is the minimum xdr unit */ - break; - - case DATA_TYPE_INT64: - case DATA_TYPE_UINT64: - case DATA_TYPE_HRTIME: -#if !defined(_KERNEL) - case DATA_TYPE_DOUBLE: -#endif - nvp_sz += 8; - break; - - case DATA_TYPE_STRING: - nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp))); - break; - - case DATA_TYPE_BYTE_ARRAY: - nvp_sz += NV_ALIGN4(NVP_NELEM(nvp)); - break; - - case DATA_TYPE_BOOLEAN_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp); - break; - - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp); - break; - - case DATA_TYPE_STRING_ARRAY: { - int i; - char **strs = (void *)NVP_VALUE(nvp); - - for (i = 0; i < NVP_NELEM(nvp); i++) - nvp_sz += 4 + NV_ALIGN4(strlen(strs[i])); - - break; - } - - case DATA_TYPE_NVLIST: - case DATA_TYPE_NVLIST_ARRAY: { - size_t nvsize = 0; - int old_nvs_op = nvs->nvs_op; - int err; - - nvs->nvs_op = NVS_OP_GETSIZE; - if (type == DATA_TYPE_NVLIST) - err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize); - else - err = nvs_embedded_nvl_array(nvs, nvp, &nvsize); - nvs->nvs_op = old_nvs_op; - - if (err != 0) - return (EINVAL); - - nvp_sz += nvsize; - break; - } - - default: - return (EINVAL); - } - - if (nvp_sz > INT32_MAX) - return (EINVAL); - - *size = nvp_sz; - - return (0); -} - - -/* - * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates - * the largest nvpair that could be encoded in the buffer. - * - * See comments above nvpair_xdr_op() for the format of xdr encoding. - * The size of a xdr packed nvpair without any data is 5 words. - * - * Using the size of the data directly as an estimate would be ok - * in all cases except one. If the data type is of DATA_TYPE_STRING_ARRAY - * then the actual nvpair has space for an array of pointers to index - * the strings. These pointers are not encoded into the packed xdr buffer. - * - * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are - * of length 0, then each string is endcoded in xdr format as a single word. - * Therefore when expanded to an nvpair there will be 2.25 word used for - * each string. (a int64_t allocated for pointer usage, and a single char - * for the null termination.) - * - * This is the calculation performed by the NVS_XDR_MAX_LEN macro. - */ -#define NVS_XDR_HDR_LEN ((size_t)(5 * 4)) -#define NVS_XDR_DATA_LEN(y) (((size_t)(y) <= NVS_XDR_HDR_LEN) ? \ - 0 : ((size_t)(y) - NVS_XDR_HDR_LEN)) -#define NVS_XDR_MAX_LEN(x) (NVP_SIZE_CALC(1, 0) + \ - (NVS_XDR_DATA_LEN(x) * 2) + \ - NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4))) - -static int -nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size) -{ - XDR *xdr = nvs->nvs_private; - int32_t encode_len, decode_len; - - switch (nvs->nvs_op) { - case NVS_OP_ENCODE: { - size_t nvsize; - - if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0) - return (EFAULT); - - decode_len = nvp->nvp_size; - encode_len = nvsize; - if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) - return (EFAULT); - - return (nvs_xdr_nvp_op(nvs, nvp)); - } - case NVS_OP_DECODE: { - struct xdr_bytesrec bytesrec; - - /* get the encode and decode size */ - if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len)) - return (EFAULT); - *size = decode_len; - - /* are we at the end of the stream? */ - if (*size == 0) - return (0); - - /* sanity check the size parameter */ - if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec)) - return (EFAULT); - - if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail)) - return (EFAULT); - break; - } - - default: - return (EINVAL); - } - return (0); -} - -static const struct nvs_ops nvs_xdr_ops = { - nvs_xdr_nvlist, - nvs_xdr_nvpair, - nvs_xdr_nvp_op, - nvs_xdr_nvp_size, - nvs_xdr_nvl_fini -}; - -static int -nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen) -{ - XDR xdr; - int err; - - nvs->nvs_ops = &nvs_xdr_ops; - - if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t), - *buflen - sizeof (nvs_header_t))) != 0) - return (err); - - err = nvs_operation(nvs, nvl, buflen); - - nvs_xdr_destroy(nvs); - - return (err); -} diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair_alloc_fixed.c b/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair_alloc_fixed.c deleted file mode 100644 index 620171e4ca4e..000000000000 --- a/sys/cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair_alloc_fixed.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/nvpair.h> -#include <sys/sysmacros.h> -#if defined(_KERNEL) && !defined(_BOOT) -#include <sys/varargs.h> -#else -#include <stdarg.h> -#include <strings.h> -#endif - -/* - * This allocator is very simple. - * - it uses a pre-allocated buffer for memory allocations. - * - it does _not_ free memory in the pre-allocated buffer. - * - * The reason for the selected implemention is simplicity. - * This allocator is designed for the usage in interrupt context when - * the caller may not wait for free memory. - */ - -/* pre-allocated buffer for memory allocations */ -typedef struct nvbuf { - uintptr_t nvb_buf; /* address of pre-allocated buffer */ - uintptr_t nvb_lim; /* limit address in the buffer */ - uintptr_t nvb_cur; /* current address in the buffer */ -} nvbuf_t; - -/* - * Initialize the pre-allocated buffer allocator. The caller needs to supply - * - * buf address of pre-allocated buffer - * bufsz size of pre-allocated buffer - * - * nv_fixed_init() calculates the remaining members of nvbuf_t. - */ -static int -nv_fixed_init(nv_alloc_t *nva, va_list valist) -{ - uintptr_t base = va_arg(valist, uintptr_t); - uintptr_t lim = base + va_arg(valist, size_t); - nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t)); - - if (base == 0 || (uintptr_t)&nvb[1] > lim) - return (EINVAL); - - nvb->nvb_buf = (uintptr_t)&nvb[0]; - nvb->nvb_cur = (uintptr_t)&nvb[1]; - nvb->nvb_lim = lim; - nva->nva_arg = nvb; - - return (0); -} - -static void * -nv_fixed_alloc(nv_alloc_t *nva, size_t size) -{ - nvbuf_t *nvb = nva->nva_arg; - uintptr_t new = nvb->nvb_cur; - - if (size == 0 || new + size > nvb->nvb_lim) - return (NULL); - - nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t)); - - return ((void *)new); -} - -/*ARGSUSED*/ -static void -nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size) -{ - /* don't free memory in the pre-allocated buffer */ -} - -static void -nv_fixed_reset(nv_alloc_t *nva) -{ - nvbuf_t *nvb = nva->nva_arg; - - nvb->nvb_cur = (uintptr_t)&nvb[1]; -} - -const nv_alloc_ops_t nv_fixed_ops_def = { - nv_fixed_init, /* nv_ao_init() */ - NULL, /* nv_ao_fini() */ - nv_fixed_alloc, /* nv_ao_alloc() */ - nv_fixed_free, /* nv_ao_free() */ - nv_fixed_reset /* nv_ao_reset() */ -}; - -const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def; diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c deleted file mode 100644 index ba79eeaaefea..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c +++ /dev/null @@ -1,310 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright (c) 2017, Intel Corporation. - */ - -#ifdef _KERNEL -#include <sys/systm.h> -#else -#include <errno.h> -#include <string.h> -#endif -#include <sys/debug.h> -#include <sys/fs/zfs.h> -#include <sys/types.h> -#include "zfeature_common.h" - -/* - * Set to disable all feature checks while opening pools, allowing pools with - * unsupported features to be opened. Set for testing only. - */ -boolean_t zfeature_checks_disable = B_FALSE; - -zfeature_info_t spa_feature_table[SPA_FEATURES]; - -/* - * Valid characters for feature guids. This list is mainly for aesthetic - * purposes and could be expanded in the future. There are different allowed - * characters in the guids reverse dns portion (before the colon) and its - * short name (after the colon). - */ -static int -valid_char(char c, boolean_t after_colon) -{ - return ((c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9') || - (after_colon && c == '_') || - (!after_colon && (c == '.' || c == '-'))); -} - -/* - * Every feature guid must contain exactly one colon which separates a reverse - * dns organization name from the feature's "short" name (e.g. - * "com.company:feature_name"). - */ -boolean_t -zfeature_is_valid_guid(const char *name) -{ - int i; - boolean_t has_colon = B_FALSE; - - i = 0; - while (name[i] != '\0') { - char c = name[i++]; - if (c == ':') { - if (has_colon) - return (B_FALSE); - has_colon = B_TRUE; - continue; - } - if (!valid_char(c, has_colon)) - return (B_FALSE); - } - - return (has_colon); -} - -boolean_t -zfeature_is_supported(const char *guid) -{ - if (zfeature_checks_disable) - return (B_TRUE); - - for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *feature = &spa_feature_table[i]; - if (strcmp(guid, feature->fi_guid) == 0) - return (B_TRUE); - } - return (B_FALSE); -} - -int -zfeature_lookup_name(const char *name, spa_feature_t *res) -{ - for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { - zfeature_info_t *feature = &spa_feature_table[i]; - if (strcmp(name, feature->fi_uname) == 0) { - if (res != NULL) - *res = i; - return (0); - } - } - - return (ENOENT); -} - -boolean_t -zfeature_depends_on(spa_feature_t fid, spa_feature_t check) -{ - zfeature_info_t *feature = &spa_feature_table[fid]; - - for (int i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) { - if (feature->fi_depends[i] == check) - return (B_TRUE); - } - return (B_FALSE); -} - -static void -zfeature_register(spa_feature_t fid, const char *guid, const char *name, - const char *desc, zfeature_flags_t flags, const spa_feature_t *deps) -{ - zfeature_info_t *feature = &spa_feature_table[fid]; - static spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; - - ASSERT(name != NULL); - ASSERT(desc != NULL); - ASSERT((flags & ZFEATURE_FLAG_READONLY_COMPAT) == 0 || - (flags & ZFEATURE_FLAG_MOS) == 0); - ASSERT3U(fid, <, SPA_FEATURES); - ASSERT(zfeature_is_valid_guid(guid)); - - if (deps == NULL) - deps = nodeps; - - feature->fi_feature = fid; - feature->fi_guid = guid; - feature->fi_uname = name; - feature->fi_desc = desc; - feature->fi_flags = flags; - feature->fi_depends = deps; -} - -void -zpool_feature_init(void) -{ - zfeature_register(SPA_FEATURE_ASYNC_DESTROY, - "com.delphix:async_destroy", "async_destroy", - "Destroy filesystems asynchronously.", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - - zfeature_register(SPA_FEATURE_EMPTY_BPOBJ, - "com.delphix:empty_bpobj", "empty_bpobj", - "Snapshots use less space.", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - - zfeature_register(SPA_FEATURE_LZ4_COMPRESS, - "org.illumos:lz4_compress", "lz4_compress", - "LZ4 compression algorithm support.", - ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, NULL); - - zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, - "com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump", - "Crash dumps to multiple vdev pools.", - 0, NULL); - - zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM, - "com.delphix:spacemap_histogram", "spacemap_histogram", - "Spacemaps maintain space histograms.", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - - zfeature_register(SPA_FEATURE_ENABLED_TXG, - "com.delphix:enabled_txg", "enabled_txg", - "Record txg at which a feature is enabled", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - - static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG, - SPA_FEATURE_NONE }; - zfeature_register(SPA_FEATURE_HOLE_BIRTH, - "com.delphix:hole_birth", "hole_birth", - "Retain hole birth txg for more precise zfs send", - ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, - hole_birth_deps); - - zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET, - "com.delphix:extensible_dataset", "extensible_dataset", - "Enhanced dataset functionality, used by other features.", - 0, NULL); - - static const spa_feature_t bookmarks_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_BOOKMARKS, - "com.delphix:bookmarks", "bookmarks", - "\"zfs bookmark\" command", - ZFEATURE_FLAG_READONLY_COMPAT, bookmarks_deps); - - static const spa_feature_t filesystem_limits_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_FS_SS_LIMIT, - "com.joyent:filesystem_limits", "filesystem_limits", - "Filesystem and snapshot limits.", - ZFEATURE_FLAG_READONLY_COMPAT, filesystem_limits_deps); - - zfeature_register(SPA_FEATURE_EMBEDDED_DATA, - "com.delphix:embedded_data", "embedded_data", - "Blocks which compress very well use even less space.", - ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, - NULL); - - zfeature_register(SPA_FEATURE_POOL_CHECKPOINT, - "com.delphix:zpool_checkpoint", "zpool_checkpoint", - "Pool state can be checkpointed, allowing rewind later.", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - - zfeature_register(SPA_FEATURE_SPACEMAP_V2, - "com.delphix:spacemap_v2", "spacemap_v2", - "Space maps representing large segments are more efficient.", - ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, - NULL); - - static const spa_feature_t large_blocks_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_LARGE_BLOCKS, - "org.open-zfs:large_blocks", "large_blocks", - "Support for blocks larger than 128KB.", - ZFEATURE_FLAG_PER_DATASET, large_blocks_deps); - - { - static const spa_feature_t large_dnode_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_LARGE_DNODE, - "org.zfsonlinux:large_dnode", "large_dnode", - "Variable on-disk size of dnodes.", - ZFEATURE_FLAG_PER_DATASET, large_dnode_deps); - } - - static const spa_feature_t sha512_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_SHA512, - "org.illumos:sha512", "sha512", - "SHA-512/256 hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, sha512_deps); - - static const spa_feature_t skein_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_SKEIN, - "org.illumos:skein", "skein", - "Skein hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, skein_deps); - -#ifdef illumos - static const spa_feature_t edonr_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_EDONR, - "org.illumos:edonr", "edonr", - "Edon-R hash algorithm.", - ZFEATURE_FLAG_PER_DATASET, edonr_deps); -#endif - - zfeature_register(SPA_FEATURE_DEVICE_REMOVAL, - "com.delphix:device_removal", "device_removal", - "Top-level vdevs can be removed, reducing logical pool size.", - ZFEATURE_FLAG_MOS, NULL); - - static const spa_feature_t obsolete_counts_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_DEVICE_REMOVAL, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_OBSOLETE_COUNTS, - "com.delphix:obsolete_counts", "obsolete_counts", - "Reduce memory used by removed devices when their blocks are " - "freed or remapped.", - ZFEATURE_FLAG_READONLY_COMPAT, obsolete_counts_deps); - - { - zfeature_register(SPA_FEATURE_ALLOCATION_CLASSES, - "org.zfsonlinux:allocation_classes", "allocation_classes", - "Support for separate allocation classes.", - ZFEATURE_FLAG_READONLY_COMPAT, NULL); - } -} diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h deleted file mode 100644 index d23a4e226e2d..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. - * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - * Copyright (c) 2017, Intel Corporation. - */ - -#ifndef _ZFEATURE_COMMON_H -#define _ZFEATURE_COMMON_H - -#include <sys/fs/zfs.h> -#include <sys/types.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct zfeature_info; - -typedef enum spa_feature { - SPA_FEATURE_NONE = -1, - SPA_FEATURE_ASYNC_DESTROY, - SPA_FEATURE_EMPTY_BPOBJ, - SPA_FEATURE_LZ4_COMPRESS, - SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, - SPA_FEATURE_SPACEMAP_HISTOGRAM, - SPA_FEATURE_ENABLED_TXG, - SPA_FEATURE_HOLE_BIRTH, - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_EMBEDDED_DATA, - SPA_FEATURE_BOOKMARKS, - SPA_FEATURE_FS_SS_LIMIT, - SPA_FEATURE_LARGE_BLOCKS, - SPA_FEATURE_LARGE_DNODE, - SPA_FEATURE_SHA512, - SPA_FEATURE_SKEIN, -#ifdef illumos - SPA_FEATURE_EDONR, -#endif - SPA_FEATURE_DEVICE_REMOVAL, - SPA_FEATURE_OBSOLETE_COUNTS, - SPA_FEATURE_POOL_CHECKPOINT, - SPA_FEATURE_SPACEMAP_V2, - SPA_FEATURE_ALLOCATION_CLASSES, - SPA_FEATURES -} spa_feature_t; - -#define SPA_FEATURE_DISABLED (-1ULL) - -typedef enum zfeature_flags { - /* Can open pool readonly even if this feature is not supported. */ - ZFEATURE_FLAG_READONLY_COMPAT = (1 << 0), - /* Is this feature necessary to read the MOS? */ - ZFEATURE_FLAG_MOS = (1 << 1), - /* Activate this feature at the same time it is enabled. */ - ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2), - /* Each dataset has a field set if it has ever used this feature. */ - ZFEATURE_FLAG_PER_DATASET = (1 << 3) -} zfeature_flags_t; - -typedef struct zfeature_info { - spa_feature_t fi_feature; - const char *fi_uname; /* User-facing feature name */ - const char *fi_guid; /* On-disk feature identifier */ - const char *fi_desc; /* Feature description */ - zfeature_flags_t fi_flags; - /* array of dependencies, terminated by SPA_FEATURE_NONE */ - const spa_feature_t *fi_depends; -} zfeature_info_t; - -typedef int (zfeature_func_t)(zfeature_info_t *, void *); - -#define ZFS_FEATURE_DEBUG - -extern zfeature_info_t spa_feature_table[SPA_FEATURES]; - -extern boolean_t zfeature_is_valid_guid(const char *); - -extern boolean_t zfeature_is_supported(const char *); -extern int zfeature_lookup_name(const char *, spa_feature_t *); -extern boolean_t zfeature_depends_on(spa_feature_t, spa_feature_t); - -extern void zpool_feature_init(void); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFEATURE_COMMON_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c deleted file mode 100644 index f18d82b507b2..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - */ - -/* - * This file is intended for functions that ought to be common between user - * land (libzfs) and the kernel. When many common routines need to be shared - * then a separate file should to be created. - */ - -#if defined(_KERNEL) -#include <sys/systm.h> -#else -#include <string.h> -#endif - -#include <sys/types.h> -#include <sys/fs/zfs.h> -#include <sys/nvpair.h> -#include "zfs_comutil.h" - -/* - * Are there allocatable vdevs? - */ -boolean_t -zfs_allocatable_devs(nvlist_t *nv) -{ - uint64_t is_log; - uint_t c; - nvlist_t **child; - uint_t children; - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - return (B_FALSE); - } - for (c = 0; c < children; c++) { - is_log = 0; - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if (!is_log) - return (B_TRUE); - } - return (B_FALSE); -} - -void -zpool_get_load_policy(nvlist_t *nvl, zpool_load_policy_t *zlpp) -{ - nvlist_t *policy; - nvpair_t *elem; - char *nm; - - /* Defaults */ - zlpp->zlp_rewind = ZPOOL_NO_REWIND; - zlpp->zlp_maxmeta = 0; - zlpp->zlp_maxdata = UINT64_MAX; - zlpp->zlp_txg = UINT64_MAX; - - if (nvl == NULL) - return; - - elem = NULL; - while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { - nm = nvpair_name(elem); - if (strcmp(nm, ZPOOL_LOAD_POLICY) == 0) { - if (nvpair_value_nvlist(elem, &policy) == 0) - zpool_get_load_policy(policy, zlpp); - return; - } else if (strcmp(nm, ZPOOL_LOAD_REWIND_POLICY) == 0) { - if (nvpair_value_uint32(elem, &zlpp->zlp_rewind) == 0) - if (zlpp->zlp_rewind & ~ZPOOL_REWIND_POLICIES) - zlpp->zlp_rewind = ZPOOL_NO_REWIND; - } else if (strcmp(nm, ZPOOL_LOAD_REQUEST_TXG) == 0) { - (void) nvpair_value_uint64(elem, &zlpp->zlp_txg); - } else if (strcmp(nm, ZPOOL_LOAD_META_THRESH) == 0) { - (void) nvpair_value_uint64(elem, &zlpp->zlp_maxmeta); - } else if (strcmp(nm, ZPOOL_LOAD_DATA_THRESH) == 0) { - (void) nvpair_value_uint64(elem, &zlpp->zlp_maxdata); - } - } - if (zlpp->zlp_rewind == 0) - zlpp->zlp_rewind = ZPOOL_NO_REWIND; -} - -typedef struct zfs_version_spa_map { - int version_zpl; - int version_spa; -} zfs_version_spa_map_t; - -/* - * Keep this table in monotonically increasing version number order. - */ -static zfs_version_spa_map_t zfs_version_table[] = { - {ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL}, - {ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL}, - {ZPL_VERSION_FUID, SPA_VERSION_FUID}, - {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE}, - {ZPL_VERSION_SA, SPA_VERSION_SA}, - {0, 0} -}; - -/* - * Return the max zpl version for a corresponding spa version - * -1 is returned if no mapping exists. - */ -int -zfs_zpl_version_map(int spa_version) -{ - int i; - int version = -1; - - for (i = 0; zfs_version_table[i].version_spa; i++) { - if (spa_version >= zfs_version_table[i].version_spa) - version = zfs_version_table[i].version_zpl; - } - - return (version); -} - -/* - * Return the min spa version for a corresponding spa version - * -1 is returned if no mapping exists. - */ -int -zfs_spa_version_map(int zpl_version) -{ - int i; - int version = -1; - - for (i = 0; zfs_version_table[i].version_zpl; i++) { - if (zfs_version_table[i].version_zpl >= zpl_version) - return (zfs_version_table[i].version_spa); - } - - return (version); -} - -/* - * This is the table of legacy internal event names; it should not be modified. - * The internal events are now stored in the history log as strings. - */ -const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { - "invalid event", - "pool create", - "vdev add", - "pool remove", - "pool destroy", - "pool export", - "pool import", - "vdev attach", - "vdev replace", - "vdev detach", - "vdev online", - "vdev offline", - "vdev upgrade", - "pool clear", - "pool scrub", - "pool property set", - "create", - "clone", - "destroy", - "destroy_begin_sync", - "inherit", - "property set", - "quota set", - "permission update", - "permission remove", - "permission who remove", - "promote", - "receive", - "rename", - "reservation set", - "replay_inc_sync", - "replay_full_sync", - "rollback", - "snapshot", - "filesystem version upgrade", - "refquota set", - "refreservation set", - "pool scrub done", - "user hold", - "user release", - "pool split", -}; diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h deleted file mode 100644 index 1c828e41e29f..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. - */ - -#ifndef _ZFS_COMUTIL_H -#define _ZFS_COMUTIL_H - -#include <sys/fs/zfs.h> -#include <sys/types.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* Needed for ZoL errno usage in MMP kernel and user code */ -#define EREMOTEIO EREMOTE - -extern boolean_t zfs_allocatable_devs(nvlist_t *); -extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *); - -extern int zfs_zpl_version_map(int spa_version); -extern int zfs_spa_version_map(int zpl_version); -#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41 -extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS]; - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_COMUTIL_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c deleted file mode 100644 index a3383f4ccf2d..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. - * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> - */ - -#include <sys/zfs_context.h> - -#if defined(_KERNEL) -#include <sys/systm.h> -#include <sys/sunddi.h> -#include <sys/ctype.h> -#else -#include <stdio.h> -#include <unistd.h> -#include <strings.h> -#include <libnvpair.h> -#include <ctype.h> -#endif -#include <sys/dsl_deleg.h> -#include "zfs_prop.h" -#include "zfs_deleg.h" -#include "zfs_namecheck.h" - -zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { - {ZFS_DELEG_PERM_ALLOW}, - {ZFS_DELEG_PERM_BOOKMARK}, - {ZFS_DELEG_PERM_CLONE}, - {ZFS_DELEG_PERM_CREATE}, - {ZFS_DELEG_PERM_DESTROY}, - {ZFS_DELEG_PERM_DIFF}, - {ZFS_DELEG_PERM_MOUNT}, - {ZFS_DELEG_PERM_PROMOTE}, - {ZFS_DELEG_PERM_RECEIVE}, - {ZFS_DELEG_PERM_REMAP}, - {ZFS_DELEG_PERM_RENAME}, - {ZFS_DELEG_PERM_ROLLBACK}, - {ZFS_DELEG_PERM_SNAPSHOT}, - {ZFS_DELEG_PERM_SHARE}, - {ZFS_DELEG_PERM_SEND}, - {ZFS_DELEG_PERM_USERPROP}, - {ZFS_DELEG_PERM_USERQUOTA}, - {ZFS_DELEG_PERM_GROUPQUOTA}, - {ZFS_DELEG_PERM_USERUSED}, - {ZFS_DELEG_PERM_GROUPUSED}, - {ZFS_DELEG_PERM_HOLD}, - {ZFS_DELEG_PERM_RELEASE}, - {NULL} -}; - -static int -zfs_valid_permission_name(const char *perm) -{ - if (zfs_deleg_canonicalize_perm(perm)) - return (0); - - return (permset_namecheck(perm, NULL, NULL)); -} - -const char * -zfs_deleg_canonicalize_perm(const char *perm) -{ - int i; - zfs_prop_t prop; - - for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) { - if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0) - return (perm); - } - - prop = zfs_name_to_prop(perm); - if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop)) - return (zfs_prop_to_name(prop)); - return (NULL); - -} - -static int -zfs_validate_who(char *who) -{ - char *p; - - if (who[2] != ZFS_DELEG_FIELD_SEP_CHR) - return (-1); - - switch (who[0]) { - case ZFS_DELEG_USER: - case ZFS_DELEG_GROUP: - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_GROUP_SETS: - if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) - return (-1); - for (p = &who[3]; *p; p++) - if (!isdigit(*p)) - return (-1); - break; - - case ZFS_DELEG_NAMED_SET: - case ZFS_DELEG_NAMED_SET_SETS: - if (who[1] != ZFS_DELEG_NA) - return (-1); - return (permset_namecheck(&who[3], NULL, NULL)); - - case ZFS_DELEG_CREATE: - case ZFS_DELEG_CREATE_SETS: - if (who[1] != ZFS_DELEG_NA) - return (-1); - if (who[3] != '\0') - return (-1); - break; - - case ZFS_DELEG_EVERYONE: - case ZFS_DELEG_EVERYONE_SETS: - if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT) - return (-1); - if (who[3] != '\0') - return (-1); - break; - - default: - return (-1); - } - - return (0); -} - -int -zfs_deleg_verify_nvlist(nvlist_t *nvp) -{ - nvpair_t *who, *perm_name; - nvlist_t *perms; - int error; - - if (nvp == NULL) - return (-1); - - who = nvlist_next_nvpair(nvp, NULL); - if (who == NULL) - return (-1); - - do { - if (zfs_validate_who(nvpair_name(who))) - return (-1); - - error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms); - - if (error && error != ENOENT) - return (-1); - if (error == ENOENT) - continue; - - perm_name = nvlist_next_nvpair(perms, NULL); - if (perm_name == NULL) { - return (-1); - } - do { - error = zfs_valid_permission_name( - nvpair_name(perm_name)); - if (error) - return (-1); - } while ((perm_name = nvlist_next_nvpair(perms, perm_name)) - != NULL); - } while ((who = nvlist_next_nvpair(nvp, who)) != NULL); - return (0); -} - -/* - * Construct the base attribute name. The base attribute names - * are the "key" to locate the jump objects which contain the actual - * permissions. The base attribute names are encoded based on - * type of entry and whether it is a local or descendent permission. - * - * Arguments: - * attr - attribute name return string, attribute is assumed to be - * ZFS_MAX_DELEG_NAME long. - * type - type of entry to construct - * inheritchr - inheritance type (local,descendent, or NA for create and - * permission set definitions - * data - is either a permission set name or a 64 bit uid/gid. - */ -void -zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, - char inheritchr, void *data) -{ - int len = ZFS_MAX_DELEG_NAME; - uint64_t *id = data; - - switch (type) { - case ZFS_DELEG_USER: - case ZFS_DELEG_GROUP: - case ZFS_DELEG_USER_SETS: - case ZFS_DELEG_GROUP_SETS: - (void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr, - ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id); - break; - case ZFS_DELEG_NAMED_SET_SETS: - case ZFS_DELEG_NAMED_SET: - (void) snprintf(attr, len, "%c-%c%s", type, - ZFS_DELEG_FIELD_SEP_CHR, (char *)data); - break; - case ZFS_DELEG_CREATE: - case ZFS_DELEG_CREATE_SETS: - (void) snprintf(attr, len, "%c-%c", type, - ZFS_DELEG_FIELD_SEP_CHR); - break; - case ZFS_DELEG_EVERYONE: - case ZFS_DELEG_EVERYONE_SETS: - (void) snprintf(attr, len, "%c%c%c", type, inheritchr, - ZFS_DELEG_FIELD_SEP_CHR); - break; - default: - ASSERT(!"bad zfs_deleg_who_type_t"); - } -} diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h deleted file mode 100644 index 06d2df9bb80d..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_deleg.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. - */ - -#ifndef _ZFS_DELEG_H -#define _ZFS_DELEG_H - -#include <sys/fs/zfs.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#define ZFS_DELEG_SET_NAME_CHR '@' /* set name lead char */ -#define ZFS_DELEG_FIELD_SEP_CHR '$' /* field separator */ - -/* - * Max name length for a delegation attribute - */ -#define ZFS_MAX_DELEG_NAME 128 - -#define ZFS_DELEG_LOCAL 'l' -#define ZFS_DELEG_DESCENDENT 'd' -#define ZFS_DELEG_NA '-' - -typedef enum { - ZFS_DELEG_NOTE_CREATE, - ZFS_DELEG_NOTE_DESTROY, - ZFS_DELEG_NOTE_SNAPSHOT, - ZFS_DELEG_NOTE_ROLLBACK, - ZFS_DELEG_NOTE_CLONE, - ZFS_DELEG_NOTE_PROMOTE, - ZFS_DELEG_NOTE_RENAME, - ZFS_DELEG_NOTE_SEND, - ZFS_DELEG_NOTE_RECEIVE, - ZFS_DELEG_NOTE_ALLOW, - ZFS_DELEG_NOTE_USERPROP, - ZFS_DELEG_NOTE_MOUNT, - ZFS_DELEG_NOTE_SHARE, - ZFS_DELEG_NOTE_USERQUOTA, - ZFS_DELEG_NOTE_GROUPQUOTA, - ZFS_DELEG_NOTE_USERUSED, - ZFS_DELEG_NOTE_GROUPUSED, - ZFS_DELEG_NOTE_HOLD, - ZFS_DELEG_NOTE_RELEASE, - ZFS_DELEG_NOTE_DIFF, - ZFS_DELEG_NOTE_BOOKMARK, - ZFS_DELEG_NOTE_REMAP, - ZFS_DELEG_NOTE_NONE -} zfs_deleg_note_t; - -typedef struct zfs_deleg_perm_tab { - char *z_perm; - zfs_deleg_note_t z_note; -} zfs_deleg_perm_tab_t; - -extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[]; - -int zfs_deleg_verify_nvlist(nvlist_t *nvlist); -void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type, - char checkflag, void *data); -const char *zfs_deleg_canonicalize_perm(const char *perm); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_DELEG_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c deleted file mode 100644 index c889169b426b..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * Fletcher Checksums - * ------------------ - * - * ZFS's 2nd and 4th order Fletcher checksums are defined by the following - * recurrence relations: - * - * a = a + f - * i i-1 i-1 - * - * b = b + a - * i i-1 i - * - * c = c + b (fletcher-4 only) - * i i-1 i - * - * d = d + c (fletcher-4 only) - * i i-1 i - * - * Where - * a_0 = b_0 = c_0 = d_0 = 0 - * and - * f_0 .. f_(n-1) are the input data. - * - * Using standard techniques, these translate into the following series: - * - * __n_ __n_ - * \ | \ | - * a = > f b = > i * f - * n /___| n - i n /___| n - i - * i = 1 i = 1 - * - * - * __n_ __n_ - * \ | i*(i+1) \ | i*(i+1)*(i+2) - * c = > ------- f d = > ------------- f - * n /___| 2 n - i n /___| 6 n - i - * i = 1 i = 1 - * - * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. - * Since the additions are done mod (2^64), errors in the high bits may not - * be noticed. For this reason, fletcher-2 is deprecated. - * - * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. - * A conservative estimate of how big the buffer can get before we overflow - * can be estimated using f_i = 0xffffffff for all i: - * - * % bc - * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 - * 2264 - * quit - * % - * - * So blocks of up to 2k will not overflow. Our largest block size is - * 128k, which has 32k 4-byte words, so we can compute the largest possible - * accumulators, then divide by 2^64 to figure the max amount of overflow: - * - * % bc - * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } - * a/2^64;b/2^64;c/2^64;d/2^64 - * 0 - * 0 - * 1365 - * 11186858 - * quit - * % - * - * So a and b cannot overflow. To make sure each bit of input has some - * effect on the contents of c and d, we can look at what the factors of - * the coefficients in the equations for c_n and d_n are. The number of 2s - * in the factors determines the lowest set bit in the multiplier. Running - * through the cases for n*(n+1)/2 reveals that the highest power of 2 is - * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow - * the 64-bit accumulators, every bit of every f_i effects every accumulator, - * even for 128k blocks. - * - * If we wanted to make a stronger version of fletcher4 (fletcher4c?), - * we could do our calculations mod (2^32 - 1) by adding in the carries - * periodically, and store the number of carries in the top 32-bits. - * - * -------------------- - * Checksum Performance - * -------------------- - * - * There are two interesting components to checksum performance: cached and - * uncached performance. With cached data, fletcher-2 is about four times - * faster than fletcher-4. With uncached data, the performance difference is - * negligible, since the cost of a cache fill dominates the processing time. - * Even though fletcher-4 is slower than fletcher-2, it is still a pretty - * efficient pass over the data. - * - * In normal operation, the data which is being checksummed is in a buffer - * which has been filled either by: - * - * 1. a compression step, which will be mostly cached, or - * 2. a bcopy() or copyin(), which will be uncached (because the - * copy is cache-bypassing). - * - * For both cached and uncached data, both fletcher checksums are much faster - * than sha-256, and slower than 'off', which doesn't touch the data at all. - */ - -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/byteorder.h> -#include <sys/zio.h> -#include <sys/spa.h> -#include <zfs_fletcher.h> - -void -fletcher_init(zio_cksum_t *zcp) -{ - ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); -} - -int -fletcher_2_incremental_native(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - - const uint64_t *ip = buf; - const uint64_t *ipend = ip + (size / sizeof (uint64_t)); - uint64_t a0, b0, a1, b1; - - a0 = zcp->zc_word[0]; - a1 = zcp->zc_word[1]; - b0 = zcp->zc_word[2]; - b1 = zcp->zc_word[3]; - - for (; ip < ipend; ip += 2) { - a0 += ip[0]; - a1 += ip[1]; - b0 += a0; - b1 += a1; - } - - ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); - return (0); -} - -/*ARGSUSED*/ -void -fletcher_2_native(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) fletcher_2_incremental_native((void *) buf, size, zcp); -} - -int -fletcher_2_incremental_byteswap(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - - const uint64_t *ip = buf; - const uint64_t *ipend = ip + (size / sizeof (uint64_t)); - uint64_t a0, b0, a1, b1; - - a0 = zcp->zc_word[0]; - a1 = zcp->zc_word[1]; - b0 = zcp->zc_word[2]; - b1 = zcp->zc_word[3]; - - for (; ip < ipend; ip += 2) { - a0 += BSWAP_64(ip[0]); - a1 += BSWAP_64(ip[1]); - b0 += a0; - b1 += a1; - } - - ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); - return (0); -} - -/*ARGSUSED*/ -void -fletcher_2_byteswap(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) fletcher_2_incremental_byteswap((void *) buf, size, zcp); -} - -int -fletcher_4_incremental_native(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - a = zcp->zc_word[0]; - b = zcp->zc_word[1]; - c = zcp->zc_word[2]; - d = zcp->zc_word[3]; - - for (; ip < ipend; ip++) { - a += ip[0]; - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); - return (0); -} - -/*ARGSUSED*/ -void -fletcher_4_native(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) fletcher_4_incremental_native((void *) buf, size, zcp); -} - -int -fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - a = zcp->zc_word[0]; - b = zcp->zc_word[1]; - c = zcp->zc_word[2]; - d = zcp->zc_word[3]; - - for (; ip < ipend; ip++) { - a += BSWAP_32(ip[0]); - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); - return (0); -} - -/*ARGSUSED*/ -void -fletcher_4_byteswap(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) fletcher_4_incremental_byteswap((void *) buf, size, zcp); -} diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.h deleted file mode 100644 index 33c6c728cf61..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_fletcher.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -#ifndef _ZFS_FLETCHER_H -#define _ZFS_FLETCHER_H - -#include <sys/types.h> -#include <sys/spa.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * fletcher checksum functions - */ - -void fletcher_init(zio_cksum_t *); -void fletcher_2_native(const void *, size_t, const void *, zio_cksum_t *); -void fletcher_2_byteswap(const void *, size_t, const void *, zio_cksum_t *); -int fletcher_2_incremental_native(void *, size_t, void *); -int fletcher_2_incremental_byteswap(void *, size_t, void *); -void fletcher_4_native(const void *, size_t, const void *, zio_cksum_t *); -void fletcher_4_byteswap(const void *, size_t, const void *, zio_cksum_t *); -int fletcher_4_incremental_native(void *, size_t, void *); -int fletcher_4_incremental_byteswap(void *, size_t, void *); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_FLETCHER_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c deleted file mode 100644 index e5ac73f96b98..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c +++ /dev/null @@ -1,1380 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2013 Xin Li <delphij@FreeBSD.org>. All rights reserved. - * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Portions Copyright 2005, 2010, Oracle and/or its affiliates. - * All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/cred.h> -#include <sys/dmu.h> -#include <sys/zio.h> -#include <sys/nvpair.h> -#include <sys/dsl_deleg.h> -#include <sys/zfs_ioctl.h> -#include "zfs_namecheck.h" -#include "zfs_ioctl_compat.h" - -static int zfs_version_ioctl = ZFS_IOCVER_CURRENT; -SYSCTL_DECL(_vfs_zfs_version); -SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl, - 0, "ZFS_IOCTL_VERSION"); - -/* - * FreeBSD zfs_cmd compatibility with older binaries - * appropriately remap/extend the zfs_cmd_t structure - */ -void -zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) -{ - zfs_cmd_v15_t *zc_c; - zfs_cmd_v28_t *zc28_c; - zfs_cmd_deadman_t *zcdm_c; - zfs_cmd_zcmd_t *zcmd_c; - zfs_cmd_edbp_t *edbp_c; - zfs_cmd_resume_t *resume_c; - zfs_cmd_inlanes_t *inlanes_c; - - switch (cflag) { - case ZFS_CMD_COMPAT_INLANES: - inlanes_c = (void *)addr; - /* zc */ - strlcpy(zc->zc_name, inlanes_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, inlanes_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, inlanes_c->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zc->field = inlanes_c->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - FIELD_COPY(zc_begin_record); - FIELD_COPY(zc_inject_record); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_resumable); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_RESUME: - resume_c = (void *)addr; - /* zc */ - strlcpy(zc->zc_name, resume_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, resume_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, resume_c->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zc->field = resume_c->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - FIELD_COPY(zc_begin_record); - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(zc->zc_inject_record.zi_func, - resume_c->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - zc->zc_inject_record.zi_nlanes = 1; - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_resumable); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_EDBP: - edbp_c = (void *)addr; - /* zc */ - strlcpy(zc->zc_name, edbp_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, edbp_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, edbp_c->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zc->field = edbp_c->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - zc->zc_begin_record.drr_u.drr_begin = edbp_c->zc_begin_record; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(zc->zc_inject_record.zi_func, - edbp_c->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - zc->zc_inject_record.zi_nlanes = 1; - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - zc->zc_resumable = B_FALSE; - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_ZCMD: - zcmd_c = (void *)addr; - /* zc */ - strlcpy(zc->zc_name, zcmd_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, zcmd_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, zcmd_c->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zc->field = zcmd_c->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - zc->zc_begin_record.drr_u.drr_begin = zcmd_c->zc_begin_record; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(zc->zc_inject_record.zi_func, - zcmd_c->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - zc->zc_inject_record.zi_nlanes = 1; - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - - /* boolean_t -> uint32_t */ - zc->zc_defer_destroy = (uint32_t)(zcmd_c->zc_defer_destroy); - zc->zc_flags = 0; - - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - zc->zc_resumable = B_FALSE; - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - - break; - - case ZFS_CMD_COMPAT_DEADMAN: - zcdm_c = (void *)addr; - /* zc */ - strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zc->field = zcdm_c->field - zc->zc_guid = zcdm_c->zc_guid; - zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf; - zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size; - zc->zc_nvlist_src = zcdm_c->zc_nvlist_src; - zc->zc_nvlist_src_size = zcdm_c->zc_nvlist_src_size; - zc->zc_nvlist_dst = zcdm_c->zc_nvlist_dst; - zc->zc_nvlist_dst_size = zcdm_c->zc_nvlist_dst_size; - zc->zc_cookie = zcdm_c->zc_cookie; - zc->zc_objset_type = zcdm_c->zc_objset_type; - zc->zc_perm_action = zcdm_c->zc_perm_action; - zc->zc_history = zcdm_c->zc_history; - zc->zc_history_len = zcdm_c->zc_history_len; - zc->zc_history_offset = zcdm_c->zc_history_offset; - zc->zc_obj = zcdm_c->zc_obj; - zc->zc_iflags = zcdm_c->zc_iflags; - zc->zc_share = zcdm_c->zc_share; - zc->zc_jailid = zcdm_c->zc_jailid; - zc->zc_objset_stats = zcdm_c->zc_objset_stats; - zc->zc_begin_record.drr_u.drr_begin = zcdm_c->zc_begin_record; - zc->zc_defer_destroy = zcdm_c->zc_defer_destroy; - (void)zcdm_c->zc_temphold; - zc->zc_action_handle = zcdm_c->zc_action_handle; - zc->zc_cleanup_fd = zcdm_c->zc_cleanup_fd; - zc->zc_simple = zcdm_c->zc_simple; - zc->zc_resumable = B_FALSE; - zc->zc_sendobj = zcdm_c->zc_sendobj; - zc->zc_fromobj = zcdm_c->zc_fromobj; - zc->zc_createtxg = zcdm_c->zc_createtxg; - zc->zc_stat = zcdm_c->zc_stat; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(zc->zc_inject_record.zi_func, - resume_c->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - zc->zc_inject_record.zi_nlanes = 1; - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - - /* we always assume zc_nvlist_dst_filled is true */ - zc->zc_nvlist_dst_filled = B_TRUE; -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_V28: - zc28_c = (void *)addr; - - /* zc */ - strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2); - strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN); - zc->zc_guid = zc28_c->zc_guid; - zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf; - zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size; - zc->zc_nvlist_src = zc28_c->zc_nvlist_src; - zc->zc_nvlist_src_size = zc28_c->zc_nvlist_src_size; - zc->zc_nvlist_dst = zc28_c->zc_nvlist_dst; - zc->zc_nvlist_dst_size = zc28_c->zc_nvlist_dst_size; - zc->zc_cookie = zc28_c->zc_cookie; - zc->zc_objset_type = zc28_c->zc_objset_type; - zc->zc_perm_action = zc28_c->zc_perm_action; - zc->zc_history = zc28_c->zc_history; - zc->zc_history_len = zc28_c->zc_history_len; - zc->zc_history_offset = zc28_c->zc_history_offset; - zc->zc_obj = zc28_c->zc_obj; - zc->zc_iflags = zc28_c->zc_iflags; - zc->zc_share = zc28_c->zc_share; - zc->zc_jailid = zc28_c->zc_jailid; - zc->zc_objset_stats = zc28_c->zc_objset_stats; - zc->zc_begin_record.drr_u.drr_begin = zc28_c->zc_begin_record; - zc->zc_defer_destroy = zc28_c->zc_defer_destroy; - (void)zc28_c->zc_temphold; - zc->zc_action_handle = zc28_c->zc_action_handle; - zc->zc_cleanup_fd = zc28_c->zc_cleanup_fd; - zc->zc_simple = zc28_c->zc_simple; - zc->zc_resumable = B_FALSE; - zc->zc_sendobj = zc28_c->zc_sendobj; - zc->zc_fromobj = zc28_c->zc_fromobj; - zc->zc_createtxg = zc28_c->zc_createtxg; - zc->zc_stat = zc28_c->zc_stat; - - /* zc->zc_inject_record */ - zc->zc_inject_record.zi_objset = - zc28_c->zc_inject_record.zi_objset; - zc->zc_inject_record.zi_object = - zc28_c->zc_inject_record.zi_object; - zc->zc_inject_record.zi_start = - zc28_c->zc_inject_record.zi_start; - zc->zc_inject_record.zi_end = - zc28_c->zc_inject_record.zi_end; - zc->zc_inject_record.zi_guid = - zc28_c->zc_inject_record.zi_guid; - zc->zc_inject_record.zi_level = - zc28_c->zc_inject_record.zi_level; - zc->zc_inject_record.zi_error = - zc28_c->zc_inject_record.zi_error; - zc->zc_inject_record.zi_type = - zc28_c->zc_inject_record.zi_type; - zc->zc_inject_record.zi_freq = - zc28_c->zc_inject_record.zi_freq; - zc->zc_inject_record.zi_failfast = - zc28_c->zc_inject_record.zi_failfast; - strlcpy(zc->zc_inject_record.zi_func, - zc28_c->zc_inject_record.zi_func, MAXNAMELEN); - zc->zc_inject_record.zi_iotype = - zc28_c->zc_inject_record.zi_iotype; - zc->zc_inject_record.zi_duration = - zc28_c->zc_inject_record.zi_duration; - zc->zc_inject_record.zi_timer = - zc28_c->zc_inject_record.zi_timer; - zc->zc_inject_record.zi_nlanes = 1; - zc->zc_inject_record.zi_cmd = ZINJECT_UNINITIALIZED; - zc->zc_inject_record.zi_pad = 0; - break; - - case ZFS_CMD_COMPAT_V15: - zc_c = (void *)addr; - - /* zc */ - strlcpy(zc->zc_name, zc_c->zc_name, MAXPATHLEN); - strlcpy(zc->zc_value, zc_c->zc_value, MAXPATHLEN); - strlcpy(zc->zc_string, zc_c->zc_string, MAXPATHLEN); - zc->zc_guid = zc_c->zc_guid; - zc->zc_nvlist_conf = zc_c->zc_nvlist_conf; - zc->zc_nvlist_conf_size = zc_c->zc_nvlist_conf_size; - zc->zc_nvlist_src = zc_c->zc_nvlist_src; - zc->zc_nvlist_src_size = zc_c->zc_nvlist_src_size; - zc->zc_nvlist_dst = zc_c->zc_nvlist_dst; - zc->zc_nvlist_dst_size = zc_c->zc_nvlist_dst_size; - zc->zc_cookie = zc_c->zc_cookie; - zc->zc_objset_type = zc_c->zc_objset_type; - zc->zc_perm_action = zc_c->zc_perm_action; - zc->zc_history = zc_c->zc_history; - zc->zc_history_len = zc_c->zc_history_len; - zc->zc_history_offset = zc_c->zc_history_offset; - zc->zc_obj = zc_c->zc_obj; - zc->zc_share = zc_c->zc_share; - zc->zc_jailid = zc_c->zc_jailid; - zc->zc_objset_stats = zc_c->zc_objset_stats; - zc->zc_begin_record.drr_u.drr_begin = zc_c->zc_begin_record; - - /* zc->zc_inject_record */ - zc->zc_inject_record.zi_objset = - zc_c->zc_inject_record.zi_objset; - zc->zc_inject_record.zi_object = - zc_c->zc_inject_record.zi_object; - zc->zc_inject_record.zi_start = - zc_c->zc_inject_record.zi_start; - zc->zc_inject_record.zi_end = - zc_c->zc_inject_record.zi_end; - zc->zc_inject_record.zi_guid = - zc_c->zc_inject_record.zi_guid; - zc->zc_inject_record.zi_level = - zc_c->zc_inject_record.zi_level; - zc->zc_inject_record.zi_error = - zc_c->zc_inject_record.zi_error; - zc->zc_inject_record.zi_type = - zc_c->zc_inject_record.zi_type; - zc->zc_inject_record.zi_freq = - zc_c->zc_inject_record.zi_freq; - zc->zc_inject_record.zi_failfast = - zc_c->zc_inject_record.zi_failfast; - break; - } -} - -void -zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, - const int cflag) -{ - zfs_cmd_v15_t *zc_c; - zfs_cmd_v28_t *zc28_c; - zfs_cmd_deadman_t *zcdm_c; - zfs_cmd_zcmd_t *zcmd_c; - zfs_cmd_edbp_t *edbp_c; - zfs_cmd_resume_t *resume_c; - zfs_cmd_inlanes_t *inlanes_c; - - switch (cflag) { - case ZFS_CMD_COMPAT_INLANES: - inlanes_c = (void *)addr; - strlcpy(inlanes_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(inlanes_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(inlanes_c->zc_string, zc->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) inlanes_c->field = zc->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - FIELD_COPY(zc_begin_record); - FIELD_COPY(zc_inject_record); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_RESUME: - resume_c = (void *)addr; - strlcpy(resume_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(resume_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(resume_c->zc_string, zc->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) resume_c->field = zc->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - FIELD_COPY(zc_begin_record); - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(resume_c->zc_inject_record.zi_func, - zc->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_EDBP: - edbp_c = (void *)addr; - strlcpy(edbp_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(edbp_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(edbp_c->zc_string, zc->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) edbp_c->field = zc->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - edbp_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(resume_c->zc_inject_record.zi_func, - zc->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - FIELD_COPY(zc_defer_destroy); - FIELD_COPY(zc_flags); - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - break; - - case ZFS_CMD_COMPAT_ZCMD: - zcmd_c = (void *)addr; - /* zc */ - strlcpy(zcmd_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(zcmd_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(zcmd_c->zc_string, zc->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zcmd_c->field = zc->field - FIELD_COPY(zc_nvlist_src); - FIELD_COPY(zc_nvlist_src_size); - FIELD_COPY(zc_nvlist_dst); - FIELD_COPY(zc_nvlist_dst_size); - FIELD_COPY(zc_nvlist_dst_filled); - FIELD_COPY(zc_pad2); - FIELD_COPY(zc_history); - FIELD_COPY(zc_guid); - FIELD_COPY(zc_nvlist_conf); - FIELD_COPY(zc_nvlist_conf_size); - FIELD_COPY(zc_cookie); - FIELD_COPY(zc_objset_type); - FIELD_COPY(zc_perm_action); - FIELD_COPY(zc_history_len); - FIELD_COPY(zc_history_offset); - FIELD_COPY(zc_obj); - FIELD_COPY(zc_iflags); - FIELD_COPY(zc_share); - FIELD_COPY(zc_jailid); - FIELD_COPY(zc_objset_stats); - zcmd_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(resume_c->zc_inject_record.zi_func, - zc->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); - - /* boolean_t -> uint32_t */ - zcmd_c->zc_defer_destroy = (uint32_t)(zc->zc_defer_destroy); - zcmd_c->zc_temphold = 0; - - FIELD_COPY(zc_action_handle); - FIELD_COPY(zc_cleanup_fd); - FIELD_COPY(zc_simple); - FIELD_COPY(zc_sendobj); - FIELD_COPY(zc_fromobj); - FIELD_COPY(zc_createtxg); - FIELD_COPY(zc_stat); -#undef FIELD_COPY - - break; - - case ZFS_CMD_COMPAT_DEADMAN: - zcdm_c = (void *)addr; - - strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN); - -#define FIELD_COPY(field) zcdm_c->field = zc->field - zcdm_c->zc_guid = zc->zc_guid; - zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf; - zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; - zcdm_c->zc_nvlist_src = zc->zc_nvlist_src; - zcdm_c->zc_nvlist_src_size = zc->zc_nvlist_src_size; - zcdm_c->zc_nvlist_dst = zc->zc_nvlist_dst; - zcdm_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size; - zcdm_c->zc_cookie = zc->zc_cookie; - zcdm_c->zc_objset_type = zc->zc_objset_type; - zcdm_c->zc_perm_action = zc->zc_perm_action; - zcdm_c->zc_history = zc->zc_history; - zcdm_c->zc_history_len = zc->zc_history_len; - zcdm_c->zc_history_offset = zc->zc_history_offset; - zcdm_c->zc_obj = zc->zc_obj; - zcdm_c->zc_iflags = zc->zc_iflags; - zcdm_c->zc_share = zc->zc_share; - zcdm_c->zc_jailid = zc->zc_jailid; - zcdm_c->zc_objset_stats = zc->zc_objset_stats; - zcdm_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - zcdm_c->zc_defer_destroy = zc->zc_defer_destroy; - zcdm_c->zc_temphold = 0; - zcdm_c->zc_action_handle = zc->zc_action_handle; - zcdm_c->zc_cleanup_fd = zc->zc_cleanup_fd; - zcdm_c->zc_simple = zc->zc_simple; - zcdm_c->zc_sendobj = zc->zc_sendobj; - zcdm_c->zc_fromobj = zc->zc_fromobj; - zcdm_c->zc_createtxg = zc->zc_createtxg; - zcdm_c->zc_stat = zc->zc_stat; - FIELD_COPY(zc_inject_record.zi_objset); - FIELD_COPY(zc_inject_record.zi_object); - FIELD_COPY(zc_inject_record.zi_start); - FIELD_COPY(zc_inject_record.zi_end); - FIELD_COPY(zc_inject_record.zi_guid); - FIELD_COPY(zc_inject_record.zi_level); - FIELD_COPY(zc_inject_record.zi_error); - FIELD_COPY(zc_inject_record.zi_type); - FIELD_COPY(zc_inject_record.zi_freq); - FIELD_COPY(zc_inject_record.zi_failfast); - strlcpy(resume_c->zc_inject_record.zi_func, - zc->zc_inject_record.zi_func, MAXNAMELEN); - FIELD_COPY(zc_inject_record.zi_iotype); - FIELD_COPY(zc_inject_record.zi_duration); - FIELD_COPY(zc_inject_record.zi_timer); - FIELD_COPY(zc_inject_record.zi_cmd); - FIELD_COPY(zc_inject_record.zi_pad); -#undef FIELD_COPY -#ifndef _KERNEL - if (request == ZFS_IOC_RECV) - strlcpy(zcdm_c->zc_top_ds, - zc->zc_value + strlen(zc->zc_value) + 1, - (MAXPATHLEN * 2) - strlen(zc->zc_value) - 1); -#endif - break; - - case ZFS_CMD_COMPAT_V28: - zc28_c = (void *)addr; - - strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2); - strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN); - zc28_c->zc_guid = zc->zc_guid; - zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf; - zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; - zc28_c->zc_nvlist_src = zc->zc_nvlist_src; - zc28_c->zc_nvlist_src_size = zc->zc_nvlist_src_size; - zc28_c->zc_nvlist_dst = zc->zc_nvlist_dst; - zc28_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size; - zc28_c->zc_cookie = zc->zc_cookie; - zc28_c->zc_objset_type = zc->zc_objset_type; - zc28_c->zc_perm_action = zc->zc_perm_action; - zc28_c->zc_history = zc->zc_history; - zc28_c->zc_history_len = zc->zc_history_len; - zc28_c->zc_history_offset = zc->zc_history_offset; - zc28_c->zc_obj = zc->zc_obj; - zc28_c->zc_iflags = zc->zc_iflags; - zc28_c->zc_share = zc->zc_share; - zc28_c->zc_jailid = zc->zc_jailid; - zc28_c->zc_objset_stats = zc->zc_objset_stats; - zc28_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - zc28_c->zc_defer_destroy = zc->zc_defer_destroy; - zc28_c->zc_temphold = 0; - zc28_c->zc_action_handle = zc->zc_action_handle; - zc28_c->zc_cleanup_fd = zc->zc_cleanup_fd; - zc28_c->zc_simple = zc->zc_simple; - zc28_c->zc_sendobj = zc->zc_sendobj; - zc28_c->zc_fromobj = zc->zc_fromobj; - zc28_c->zc_createtxg = zc->zc_createtxg; - zc28_c->zc_stat = zc->zc_stat; -#ifndef _KERNEL - if (request == ZFS_IOC_RECV) - strlcpy(zc28_c->zc_top_ds, - zc->zc_value + strlen(zc->zc_value) + 1, - MAXPATHLEN * 2 - strlen(zc->zc_value) - 1); -#endif - /* zc_inject_record */ - zc28_c->zc_inject_record.zi_objset = - zc->zc_inject_record.zi_objset; - zc28_c->zc_inject_record.zi_object = - zc->zc_inject_record.zi_object; - zc28_c->zc_inject_record.zi_start = - zc->zc_inject_record.zi_start; - zc28_c->zc_inject_record.zi_end = - zc->zc_inject_record.zi_end; - zc28_c->zc_inject_record.zi_guid = - zc->zc_inject_record.zi_guid; - zc28_c->zc_inject_record.zi_level = - zc->zc_inject_record.zi_level; - zc28_c->zc_inject_record.zi_error = - zc->zc_inject_record.zi_error; - zc28_c->zc_inject_record.zi_type = - zc->zc_inject_record.zi_type; - zc28_c->zc_inject_record.zi_freq = - zc->zc_inject_record.zi_freq; - zc28_c->zc_inject_record.zi_failfast = - zc->zc_inject_record.zi_failfast; - strlcpy(zc28_c->zc_inject_record.zi_func, - zc->zc_inject_record.zi_func, MAXNAMELEN); - zc28_c->zc_inject_record.zi_iotype = - zc->zc_inject_record.zi_iotype; - zc28_c->zc_inject_record.zi_duration = - zc->zc_inject_record.zi_duration; - zc28_c->zc_inject_record.zi_timer = - zc->zc_inject_record.zi_timer; - break; - - case ZFS_CMD_COMPAT_V15: - zc_c = (void *)addr; - - /* zc */ - strlcpy(zc_c->zc_name, zc->zc_name, MAXPATHLEN); - strlcpy(zc_c->zc_value, zc->zc_value, MAXPATHLEN); - strlcpy(zc_c->zc_string, zc->zc_string, MAXPATHLEN); - zc_c->zc_guid = zc->zc_guid; - zc_c->zc_nvlist_conf = zc->zc_nvlist_conf; - zc_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; - zc_c->zc_nvlist_src = zc->zc_nvlist_src; - zc_c->zc_nvlist_src_size = zc->zc_nvlist_src_size; - zc_c->zc_nvlist_dst = zc->zc_nvlist_dst; - zc_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size; - zc_c->zc_cookie = zc->zc_cookie; - zc_c->zc_objset_type = zc->zc_objset_type; - zc_c->zc_perm_action = zc->zc_perm_action; - zc_c->zc_history = zc->zc_history; - zc_c->zc_history_len = zc->zc_history_len; - zc_c->zc_history_offset = zc->zc_history_offset; - zc_c->zc_obj = zc->zc_obj; - zc_c->zc_share = zc->zc_share; - zc_c->zc_jailid = zc->zc_jailid; - zc_c->zc_objset_stats = zc->zc_objset_stats; - zc_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - - /* zc_inject_record */ - zc_c->zc_inject_record.zi_objset = - zc->zc_inject_record.zi_objset; - zc_c->zc_inject_record.zi_object = - zc->zc_inject_record.zi_object; - zc_c->zc_inject_record.zi_start = - zc->zc_inject_record.zi_start; - zc_c->zc_inject_record.zi_end = - zc->zc_inject_record.zi_end; - zc_c->zc_inject_record.zi_guid = - zc->zc_inject_record.zi_guid; - zc_c->zc_inject_record.zi_level = - zc->zc_inject_record.zi_level; - zc_c->zc_inject_record.zi_error = - zc->zc_inject_record.zi_error; - zc_c->zc_inject_record.zi_type = - zc->zc_inject_record.zi_type; - zc_c->zc_inject_record.zi_freq = - zc->zc_inject_record.zi_freq; - zc_c->zc_inject_record.zi_failfast = - zc->zc_inject_record.zi_failfast; - - break; - } -} - -static int -zfs_ioctl_compat_get_nvlist(uint64_t nvl, size_t size, int iflag, - nvlist_t **nvp) -{ - char *packed; - int error; - nvlist_t *list = NULL; - - /* - * Read in and unpack the user-supplied nvlist. - */ - if (size == 0) - return (EINVAL); - -#ifdef _KERNEL - packed = kmem_alloc(size, KM_SLEEP); - if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, - iflag)) != 0) { - kmem_free(packed, size); - return (error); - } -#else - packed = (void *)(uintptr_t)nvl; -#endif - - error = nvlist_unpack(packed, size, &list, 0); - -#ifdef _KERNEL - kmem_free(packed, size); -#endif - - if (error != 0) - return (error); - - *nvp = list; - return (0); -} - -static int -zfs_ioctl_compat_put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) -{ - char *packed = NULL; - int error = 0; - size_t size; - - VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); - -#ifdef _KERNEL - packed = kmem_alloc(size, KM_SLEEP); - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - KM_SLEEP) == 0); - - if (ddi_copyout(packed, - (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) - error = EFAULT; - kmem_free(packed, size); -#else - packed = (void *)(uintptr_t)zc->zc_nvlist_dst; - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - 0) == 0); -#endif - - zc->zc_nvlist_dst_size = size; - return (error); -} - -static void -zfs_ioctl_compat_fix_stats_nvlist(nvlist_t *nvl) -{ - nvlist_t **child; - nvlist_t *nvroot = NULL; - vdev_stat_t *vs; - uint_t c, children, nelem; - - if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - zfs_ioctl_compat_fix_stats_nvlist(child[c]); - } - } - - if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0) - zfs_ioctl_compat_fix_stats_nvlist(nvroot); -#ifdef _KERNEL - if ((nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS, -#else - if ((nvlist_lookup_uint64_array(nvl, "stats", -#endif - - (uint64_t **)&vs, &nelem) == 0)) { - nvlist_add_uint64_array(nvl, -#ifdef _KERNEL - "stats", -#else - ZPOOL_CONFIG_VDEV_STATS, -#endif - (uint64_t *)vs, nelem); -#ifdef _KERNEL - nvlist_remove(nvl, ZPOOL_CONFIG_VDEV_STATS, -#else - nvlist_remove(nvl, "stats", -#endif - DATA_TYPE_UINT64_ARRAY); - } -} - -static int -zfs_ioctl_compat_fix_stats(zfs_cmd_t *zc, const int nc) -{ - nvlist_t *nv, *nvp = NULL; - nvpair_t *elem; - int error; - - if ((error = zfs_ioctl_compat_get_nvlist(zc->zc_nvlist_dst, - zc->zc_nvlist_dst_size, zc->zc_iflags, &nv)) != 0) - return (error); - - if (nc == 5) { /* ZFS_IOC_POOL_STATS */ - elem = NULL; - while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { - if (nvpair_value_nvlist(elem, &nvp) == 0) - zfs_ioctl_compat_fix_stats_nvlist(nvp); - } - elem = NULL; - } else - zfs_ioctl_compat_fix_stats_nvlist(nv); - - error = zfs_ioctl_compat_put_nvlist(zc, nv); - - nvlist_free(nv); - - return (error); -} - -static int -zfs_ioctl_compat_pool_get_props(zfs_cmd_t *zc) -{ - nvlist_t *nv, *nva = NULL; - int error; - - if ((error = zfs_ioctl_compat_get_nvlist(zc->zc_nvlist_dst, - zc->zc_nvlist_dst_size, zc->zc_iflags, &nv)) != 0) - return (error); - -#ifdef _KERNEL - if (nvlist_lookup_nvlist(nv, "allocated", &nva) == 0) { - nvlist_add_nvlist(nv, "used", nva); - nvlist_remove(nv, "allocated", DATA_TYPE_NVLIST); - } - - if (nvlist_lookup_nvlist(nv, "free", &nva) == 0) { - nvlist_add_nvlist(nv, "available", nva); - nvlist_remove(nv, "free", DATA_TYPE_NVLIST); - } -#else - if (nvlist_lookup_nvlist(nv, "used", &nva) == 0) { - nvlist_add_nvlist(nv, "allocated", nva); - nvlist_remove(nv, "used", DATA_TYPE_NVLIST); - } - - if (nvlist_lookup_nvlist(nv, "available", &nva) == 0) { - nvlist_add_nvlist(nv, "free", nva); - nvlist_remove(nv, "available", DATA_TYPE_NVLIST); - } -#endif - - error = zfs_ioctl_compat_put_nvlist(zc, nv); - - nvlist_free(nv); - - return (error); -} - -#ifndef _KERNEL -int -zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag) -{ - int nc, ret; - void *zc_c; - unsigned long ncmd; - zfs_iocparm_t zp; - - switch (cflag) { - case ZFS_CMD_COMPAT_NONE: - ncmd = _IOWR('Z', request, struct zfs_iocparm); - zp.zfs_cmd = (uint64_t)zc; - zp.zfs_cmd_size = sizeof(zfs_cmd_t); - zp.zfs_ioctl_version = ZFS_IOCVER_CURRENT; - return (ioctl(fd, ncmd, &zp)); - case ZFS_CMD_COMPAT_INLANES: - ncmd = _IOWR('Z', request, struct zfs_iocparm); - zp.zfs_cmd = (uint64_t)zc; - zp.zfs_cmd_size = sizeof(zfs_cmd_inlanes_t); - zp.zfs_ioctl_version = ZFS_IOCVER_INLANES; - return (ioctl(fd, ncmd, &zp)); - case ZFS_CMD_COMPAT_RESUME: - ncmd = _IOWR('Z', request, struct zfs_iocparm); - zp.zfs_cmd = (uint64_t)zc; - zp.zfs_cmd_size = sizeof(zfs_cmd_resume_t); - zp.zfs_ioctl_version = ZFS_IOCVER_RESUME; - return (ioctl(fd, ncmd, &zp)); - case ZFS_CMD_COMPAT_EDBP: - ncmd = _IOWR('Z', request, struct zfs_iocparm); - zp.zfs_cmd = (uint64_t)zc; - zp.zfs_cmd_size = sizeof(zfs_cmd_edbp_t); - zp.zfs_ioctl_version = ZFS_IOCVER_EDBP; - return (ioctl(fd, ncmd, &zp)); - case ZFS_CMD_COMPAT_ZCMD: - ncmd = _IOWR('Z', request, struct zfs_iocparm); - zp.zfs_cmd = (uint64_t)zc; - zp.zfs_cmd_size = sizeof(zfs_cmd_zcmd_t); - zp.zfs_ioctl_version = ZFS_IOCVER_ZCMD; - return (ioctl(fd, ncmd, &zp)); - case ZFS_CMD_COMPAT_LZC: - ncmd = _IOWR('Z', request, struct zfs_cmd); - return (ioctl(fd, ncmd, zc)); - case ZFS_CMD_COMPAT_DEADMAN: - zc_c = malloc(sizeof(zfs_cmd_deadman_t)); - ncmd = _IOWR('Z', request, struct zfs_cmd_deadman); - break; - case ZFS_CMD_COMPAT_V28: - zc_c = malloc(sizeof(zfs_cmd_v28_t)); - ncmd = _IOWR('Z', request, struct zfs_cmd_v28); - break; - case ZFS_CMD_COMPAT_V15: - nc = zfs_ioctl_v28_to_v15[request]; - zc_c = malloc(sizeof(zfs_cmd_v15_t)); - ncmd = _IOWR('Z', nc, struct zfs_cmd_v15); - break; - default: - return (EINVAL); - } - - if (ZFS_IOCREQ(ncmd) == ZFS_IOC_COMPAT_FAIL) - return (ENOTSUP); - - zfs_cmd_compat_put(zc, (caddr_t)zc_c, request, cflag); - - ret = ioctl(fd, ncmd, zc_c); - if (cflag == ZFS_CMD_COMPAT_V15 && - nc == ZFS_IOC_POOL_IMPORT) - ret = ioctl(fd, _IOWR('Z', ZFS_IOC_POOL_CONFIGS, - struct zfs_cmd_v15), zc_c); - zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag); - free(zc_c); - - if (cflag == ZFS_CMD_COMPAT_V15) { - switch (nc) { - case ZFS_IOC_POOL_IMPORT: - case ZFS_IOC_POOL_CONFIGS: - case ZFS_IOC_POOL_STATS: - case ZFS_IOC_POOL_TRYIMPORT: - zfs_ioctl_compat_fix_stats(zc, nc); - break; - case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ - zfs_ioctl_compat_pool_get_props(zc); - break; - } - } - - return (ret); -} -#else /* _KERNEL */ -int -zfs_ioctl_compat_pre(zfs_cmd_t *zc, int *vec, const int cflag) -{ - int error = 0; - - /* are we creating a clone? */ - if (*vec == ZFS_IOC_CREATE && zc->zc_value[0] != '\0') - *vec = ZFS_IOC_CLONE; - - if (cflag == ZFS_CMD_COMPAT_V15) { - switch (*vec) { - - case 7: /* ZFS_IOC_POOL_SCRUB (v15) */ - zc->zc_cookie = POOL_SCAN_SCRUB; - break; - } - } - - return (error); -} - -void -zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag) -{ - if (cflag == ZFS_CMD_COMPAT_V15) { - switch (vec) { - case ZFS_IOC_POOL_CONFIGS: - case ZFS_IOC_POOL_STATS: - case ZFS_IOC_POOL_TRYIMPORT: - zfs_ioctl_compat_fix_stats(zc, vec); - break; - case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ - zfs_ioctl_compat_pool_get_props(zc); - break; - } - } -} - -nvlist_t * -zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec, - const int cflag) -{ - nvlist_t *nvl, *tmpnvl, *hnvl; - nvpair_t *elem; - char *poolname, *snapname; - int err; - - if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC || - cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP || - cflag == ZFS_CMD_COMPAT_RESUME || cflag == ZFS_CMD_COMPAT_INLANES) - goto out; - - switch (vec) { - case ZFS_IOC_CREATE: - nvl = fnvlist_alloc(); - fnvlist_add_int32(nvl, "type", zc->zc_objset_type); - if (innvl != NULL) { - fnvlist_add_nvlist(nvl, "props", innvl); - nvlist_free(innvl); - } - return (nvl); - break; - case ZFS_IOC_CLONE: - nvl = fnvlist_alloc(); - fnvlist_add_string(nvl, "origin", zc->zc_value); - if (innvl != NULL) { - fnvlist_add_nvlist(nvl, "props", innvl); - nvlist_free(innvl); - } - return (nvl); - break; - case ZFS_IOC_SNAPSHOT: - if (innvl == NULL) - goto out; - nvl = fnvlist_alloc(); - fnvlist_add_nvlist(nvl, "props", innvl); - tmpnvl = fnvlist_alloc(); - snapname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); - fnvlist_add_boolean(tmpnvl, snapname); - kmem_free(snapname, strlen(snapname + 1)); - /* check if we are doing a recursive snapshot */ - if (zc->zc_cookie) - dmu_get_recursive_snaps_nvl(zc->zc_name, zc->zc_value, - tmpnvl); - fnvlist_add_nvlist(nvl, "snaps", tmpnvl); - fnvlist_free(tmpnvl); - nvlist_free(innvl); - /* strip dataset part from zc->zc_name */ - zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; - return (nvl); - break; - case ZFS_IOC_SPACE_SNAPS: - nvl = fnvlist_alloc(); - fnvlist_add_string(nvl, "firstsnap", zc->zc_value); - if (innvl != NULL) - nvlist_free(innvl); - return (nvl); - break; - case ZFS_IOC_DESTROY_SNAPS: - if (innvl == NULL && cflag == ZFS_CMD_COMPAT_DEADMAN) - goto out; - nvl = fnvlist_alloc(); - if (innvl != NULL) { - fnvlist_add_nvlist(nvl, "snaps", innvl); - } else { - /* - * We are probably called by even older binaries, - * allocate and populate nvlist with recursive - * snapshots - */ - if (zfs_component_namecheck(zc->zc_value, NULL, - NULL) == 0) { - tmpnvl = fnvlist_alloc(); - if (dmu_get_recursive_snaps_nvl(zc->zc_name, - zc->zc_value, tmpnvl) == 0) - fnvlist_add_nvlist(nvl, "snaps", - tmpnvl); - nvlist_free(tmpnvl); - } - } - if (innvl != NULL) - nvlist_free(innvl); - /* strip dataset part from zc->zc_name */ - zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; - return (nvl); - break; - case ZFS_IOC_HOLD: - nvl = fnvlist_alloc(); - tmpnvl = fnvlist_alloc(); - if (zc->zc_cleanup_fd != -1) - fnvlist_add_int32(nvl, "cleanup_fd", - (int32_t)zc->zc_cleanup_fd); - if (zc->zc_cookie) { - hnvl = fnvlist_alloc(); - if (dmu_get_recursive_snaps_nvl(zc->zc_name, - zc->zc_value, hnvl) == 0) { - elem = NULL; - while ((elem = nvlist_next_nvpair(hnvl, - elem)) != NULL) { - nvlist_add_string(tmpnvl, - nvpair_name(elem), zc->zc_string); - } - } - nvlist_free(hnvl); - } else { - snapname = kmem_asprintf("%s@%s", zc->zc_name, - zc->zc_value); - nvlist_add_string(tmpnvl, snapname, zc->zc_string); - kmem_free(snapname, strlen(snapname + 1)); - } - fnvlist_add_nvlist(nvl, "holds", tmpnvl); - nvlist_free(tmpnvl); - if (innvl != NULL) - nvlist_free(innvl); - /* strip dataset part from zc->zc_name */ - zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; - return (nvl); - break; - case ZFS_IOC_RELEASE: - nvl = fnvlist_alloc(); - tmpnvl = fnvlist_alloc(); - if (zc->zc_cookie) { - hnvl = fnvlist_alloc(); - if (dmu_get_recursive_snaps_nvl(zc->zc_name, - zc->zc_value, hnvl) == 0) { - elem = NULL; - while ((elem = nvlist_next_nvpair(hnvl, - elem)) != NULL) { - fnvlist_add_boolean(tmpnvl, - zc->zc_string); - fnvlist_add_nvlist(nvl, - nvpair_name(elem), tmpnvl); - } - } - nvlist_free(hnvl); - } else { - snapname = kmem_asprintf("%s@%s", zc->zc_name, - zc->zc_value); - fnvlist_add_boolean(tmpnvl, zc->zc_string); - fnvlist_add_nvlist(nvl, snapname, tmpnvl); - kmem_free(snapname, strlen(snapname + 1)); - } - nvlist_free(tmpnvl); - if (innvl != NULL) - nvlist_free(innvl); - /* strip dataset part from zc->zc_name */ - zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; - return (nvl); - break; - } -out: - return (innvl); -} - -nvlist_t * -zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec, - const int cflag) -{ - nvlist_t *tmpnvl; - - if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC || - cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP || - cflag == ZFS_CMD_COMPAT_RESUME || cflag == ZFS_CMD_COMPAT_INLANES) - return (outnvl); - - switch (vec) { - case ZFS_IOC_SPACE_SNAPS: - (void) nvlist_lookup_uint64(outnvl, "used", &zc->zc_cookie); - (void) nvlist_lookup_uint64(outnvl, "compressed", - &zc->zc_objset_type); - (void) nvlist_lookup_uint64(outnvl, "uncompressed", - &zc->zc_perm_action); - nvlist_free(outnvl); - /* return empty outnvl */ - tmpnvl = fnvlist_alloc(); - return (tmpnvl); - break; - case ZFS_IOC_CREATE: - case ZFS_IOC_CLONE: - case ZFS_IOC_HOLD: - case ZFS_IOC_RELEASE: - nvlist_free(outnvl); - /* return empty outnvl */ - tmpnvl = fnvlist_alloc(); - return (tmpnvl); - break; - } - - return (outnvl); -} -#endif /* KERNEL */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h deleted file mode 100644 index 61f1514e3ebd..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h +++ /dev/null @@ -1,543 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved. - * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_ZFS_IOCTL_COMPAT_H -#define _SYS_ZFS_IOCTL_COMPAT_H - -#include <sys/cred.h> -#include <sys/dmu.h> -#include <sys/zio.h> -#include <sys/dsl_deleg.h> -#include <sys/zfs_ioctl.h> - -#ifdef _KERNEL -#include <sys/nvpair.h> -#endif /* _KERNEL */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Backwards ioctl compatibility - */ - -/* ioctl versions for vfs.zfs.version.ioctl */ -#define ZFS_IOCVER_UNDEF -1 -#define ZFS_IOCVER_NONE 0 -#define ZFS_IOCVER_DEADMAN 1 -#define ZFS_IOCVER_LZC 2 -#define ZFS_IOCVER_ZCMD 3 -#define ZFS_IOCVER_EDBP 4 -#define ZFS_IOCVER_RESUME 5 -#define ZFS_IOCVER_INLANES 6 -#define ZFS_IOCVER_PAD 7 -#define ZFS_IOCVER_CURRENT ZFS_IOCVER_PAD - -/* compatibility conversion flag */ -#define ZFS_CMD_COMPAT_NONE 0 -#define ZFS_CMD_COMPAT_V15 1 -#define ZFS_CMD_COMPAT_V28 2 -#define ZFS_CMD_COMPAT_DEADMAN 3 -#define ZFS_CMD_COMPAT_LZC 4 -#define ZFS_CMD_COMPAT_ZCMD 5 -#define ZFS_CMD_COMPAT_EDBP 6 -#define ZFS_CMD_COMPAT_RESUME 7 -#define ZFS_CMD_COMPAT_INLANES 8 - -#define ZFS_IOC_COMPAT_PASS 254 -#define ZFS_IOC_COMPAT_FAIL 255 - -#define ZFS_IOCREQ(ioreq) ((ioreq) & 0xff) - -typedef struct zfs_iocparm { - uint32_t zfs_ioctl_version; - uint64_t zfs_cmd; - uint64_t zfs_cmd_size; -} zfs_iocparm_t; - -typedef struct zinject_record_v15 { - uint64_t zi_objset; - uint64_t zi_object; - uint64_t zi_start; - uint64_t zi_end; - uint64_t zi_guid; - uint32_t zi_level; - uint32_t zi_error; - uint64_t zi_type; - uint32_t zi_freq; - uint32_t zi_failfast; -} zinject_record_v15_t; - -typedef struct zfs_cmd_v15 { - char zc_name[MAXPATHLEN]; - char zc_value[MAXPATHLEN]; - char zc_string[MAXNAMELEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history; /* really (char *) */ - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; - zinject_record_v15_t zc_inject_record; -} zfs_cmd_v15_t; - -typedef struct zinject_record_v28 { - uint64_t zi_objset; - uint64_t zi_object; - uint64_t zi_start; - uint64_t zi_end; - uint64_t zi_guid; - uint32_t zi_level; - uint32_t zi_error; - uint64_t zi_type; - uint32_t zi_freq; - uint32_t zi_failfast; - char zi_func[MAXNAMELEN]; - uint32_t zi_iotype; - int32_t zi_duration; - uint64_t zi_timer; -} zinject_record_v28_t; - -typedef struct zfs_cmd_v28 { - char zc_name[MAXPATHLEN]; - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - char zc_top_ds[MAXPATHLEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history; /* really (char *) */ - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; - zinject_record_v28_t zc_inject_record; - boolean_t zc_defer_destroy; - boolean_t zc_temphold; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - uint8_t zc_pad[3]; /* alignment */ - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_v28_t; - -typedef struct zinject_record_deadman { - uint64_t zi_objset; - uint64_t zi_object; - uint64_t zi_start; - uint64_t zi_end; - uint64_t zi_guid; - uint32_t zi_level; - uint32_t zi_error; - uint64_t zi_type; - uint32_t zi_freq; - uint32_t zi_failfast; - char zi_func[MAXNAMELEN]; - uint32_t zi_iotype; - int32_t zi_duration; - uint64_t zi_timer; - uint32_t zi_cmd; - uint32_t zi_pad; -} zinject_record_deadman_t; - -typedef struct zfs_cmd_deadman { - char zc_name[MAXPATHLEN]; - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - char zc_top_ds[MAXPATHLEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history; /* really (char *) */ - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; - /* zc_inject_record doesn't change in libzfs_core */ - zinject_record_deadman_t zc_inject_record; - boolean_t zc_defer_destroy; - boolean_t zc_temphold; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - uint8_t zc_pad[3]; /* alignment */ - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_deadman_t; - -typedef struct zfs_cmd_zcmd { - char zc_name[MAXPATHLEN]; /* name of pool or dataset */ - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ - int zc_pad2; - - /* - * The following members are for legacy ioctls which haven't been - * converted to the new method. - */ - uint64_t zc_history; /* really (char *) */ - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; - zinject_record_deadman_t zc_inject_record; - boolean_t zc_defer_destroy; - boolean_t zc_temphold; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - uint8_t zc_pad[3]; /* alignment */ - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_zcmd_t; - -typedef struct zfs_cmd_edbp { - char zc_name[MAXPATHLEN]; /* name of pool or dataset */ - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ - int zc_pad2; - - /* - * The following members are for legacy ioctls which haven't been - * converted to the new method. - */ - uint64_t zc_history; /* really (char *) */ - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - struct drr_begin zc_begin_record; - zinject_record_deadman_t zc_inject_record; - uint32_t zc_defer_destroy; - uint32_t zc_flags; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - uint8_t zc_pad[3]; /* alignment */ - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_edbp_t; - -typedef struct zfs_cmd_resume { - char zc_name[MAXPATHLEN]; /* name of pool or dataset */ - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ - int zc_pad2; - - /* - * The following members are for legacy ioctls which haven't been - * converted to the new method. - */ - uint64_t zc_history; /* really (char *) */ - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - dmu_replay_record_t zc_begin_record; - zinject_record_deadman_t zc_inject_record; - uint32_t zc_defer_destroy; - uint32_t zc_flags; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - boolean_t zc_resumable; - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_resume_t; - -typedef struct zfs_cmd_inlanes { - char zc_name[MAXPATHLEN]; /* name of pool or dataset */ - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; - boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ - int zc_pad2; - - /* - * The following members are for legacy ioctls which haven't been - * converted to the new method. - */ - uint64_t zc_history; /* really (char *) */ - char zc_value[MAXPATHLEN * 2]; - char zc_string[MAXNAMELEN]; - uint64_t zc_guid; - uint64_t zc_nvlist_conf; /* really (char *) */ - uint64_t zc_nvlist_conf_size; - uint64_t zc_cookie; - uint64_t zc_objset_type; - uint64_t zc_perm_action; - uint64_t zc_history_len; - uint64_t zc_history_offset; - uint64_t zc_obj; - uint64_t zc_iflags; /* internal to zfs(7fs) */ - zfs_share_t zc_share; - uint64_t zc_jailid; - dmu_objset_stats_t zc_objset_stats; - dmu_replay_record_t zc_begin_record; - zinject_record_t zc_inject_record; - uint32_t zc_defer_destroy; - uint32_t zc_flags; - uint64_t zc_action_handle; - int zc_cleanup_fd; - uint8_t zc_simple; - boolean_t zc_resumable; - uint64_t zc_sendobj; - uint64_t zc_fromobj; - uint64_t zc_createtxg; - zfs_stat_t zc_stat; -} zfs_cmd_inlanes_t; - -#ifdef _KERNEL -unsigned static long zfs_ioctl_v15_to_v28[] = { - 0, /* 0 ZFS_IOC_POOL_CREATE */ - 1, /* 1 ZFS_IOC_POOL_DESTROY */ - 2, /* 2 ZFS_IOC_POOL_IMPORT */ - 3, /* 3 ZFS_IOC_POOL_EXPORT */ - 4, /* 4 ZFS_IOC_POOL_CONFIGS */ - 5, /* 5 ZFS_IOC_POOL_STATS */ - 6, /* 6 ZFS_IOC_POOL_TRYIMPORT */ - 7, /* 7 ZFS_IOC_POOL_SCRUB */ - 8, /* 8 ZFS_IOC_POOL_FREEZE */ - 9, /* 9 ZFS_IOC_POOL_UPGRADE */ - 10, /* 10 ZFS_IOC_POOL_GET_HISTORY */ - 11, /* 11 ZFS_IOC_VDEV_ADD */ - 12, /* 12 ZFS_IOC_VDEV_REMOVE */ - 13, /* 13 ZFS_IOC_VDEV_SET_STATE */ - 14, /* 14 ZFS_IOC_VDEV_ATTACH */ - 15, /* 15 ZFS_IOC_VDEV_DETACH */ - 16, /* 16 ZFS_IOC_VDEV_SETPATH */ - 18, /* 17 ZFS_IOC_OBJSET_STATS */ - 19, /* 18 ZFS_IOC_OBJSET_ZPLPROPS */ - 20, /* 19 ZFS_IOC_DATASET_LIST_NEXT */ - 21, /* 20 ZFS_IOC_SNAPSHOT_LIST_NEXT */ - 22, /* 21 ZFS_IOC_SET_PROP */ - ZFS_IOC_COMPAT_PASS, /* 22 ZFS_IOC_CREATE_MINOR */ - ZFS_IOC_COMPAT_PASS, /* 23 ZFS_IOC_REMOVE_MINOR */ - 23, /* 24 ZFS_IOC_CREATE */ - 24, /* 25 ZFS_IOC_DESTROY */ - 25, /* 26 ZFS_IOC_ROLLBACK */ - 26, /* 27 ZFS_IOC_RENAME */ - 27, /* 28 ZFS_IOC_RECV */ - 28, /* 29 ZFS_IOC_SEND */ - 29, /* 30 ZFS_IOC_INJECT_FAULT */ - 30, /* 31 ZFS_IOC_CLEAR_FAULT */ - 31, /* 32 ZFS_IOC_INJECT_LIST_NEXT */ - 32, /* 33 ZFS_IOC_ERROR_LOG */ - 33, /* 34 ZFS_IOC_CLEAR */ - 34, /* 35 ZFS_IOC_PROMOTE */ - 35, /* 36 ZFS_IOC_DESTROY_SNAPS */ - 36, /* 37 ZFS_IOC_SNAPSHOT */ - 37, /* 38 ZFS_IOC_DSOBJ_TO_DSNAME */ - 38, /* 39 ZFS_IOC_OBJ_TO_PATH */ - 39, /* 40 ZFS_IOC_POOL_SET_PROPS */ - 40, /* 41 ZFS_IOC_POOL_GET_PROPS */ - 41, /* 42 ZFS_IOC_SET_FSACL */ - 42, /* 43 ZFS_IOC_GET_FSACL */ - ZFS_IOC_COMPAT_PASS, /* 44 ZFS_IOC_ISCSI_PERM_CHECK */ - 43, /* 45 ZFS_IOC_SHARE */ - 44, /* 46 ZFS_IOC_IHNERIT_PROP */ - 58, /* 47 ZFS_IOC_JAIL */ - 59, /* 48 ZFS_IOC_UNJAIL */ - 45, /* 49 ZFS_IOC_SMB_ACL */ - 46, /* 50 ZFS_IOC_USERSPACE_ONE */ - 47, /* 51 ZFS_IOC_USERSPACE_MANY */ - 48, /* 52 ZFS_IOC_USERSPACE_UPGRADE */ - 17, /* 53 ZFS_IOC_SETFRU */ -}; - -#else /* KERNEL */ -unsigned static long zfs_ioctl_v28_to_v15[] = { - 0, /* 0 ZFS_IOC_POOL_CREATE */ - 1, /* 1 ZFS_IOC_POOL_DESTROY */ - 2, /* 2 ZFS_IOC_POOL_IMPORT */ - 3, /* 3 ZFS_IOC_POOL_EXPORT */ - 4, /* 4 ZFS_IOC_POOL_CONFIGS */ - 5, /* 5 ZFS_IOC_POOL_STATS */ - 6, /* 6 ZFS_IOC_POOL_TRYIMPORT */ - 7, /* 7 ZFS_IOC_POOL_SCAN */ - 8, /* 8 ZFS_IOC_POOL_FREEZE */ - 9, /* 9 ZFS_IOC_POOL_UPGRADE */ - 10, /* 10 ZFS_IOC_POOL_GET_HISTORY */ - 11, /* 11 ZFS_IOC_VDEV_ADD */ - 12, /* 12 ZFS_IOC_VDEV_REMOVE */ - 13, /* 13 ZFS_IOC_VDEV_SET_STATE */ - 14, /* 14 ZFS_IOC_VDEV_ATTACH */ - 15, /* 15 ZFS_IOC_VDEV_DETACH */ - 16, /* 16 ZFS_IOC_VDEV_SETPATH */ - 53, /* 17 ZFS_IOC_VDEV_SETFRU */ - 17, /* 18 ZFS_IOC_OBJSET_STATS */ - 18, /* 19 ZFS_IOC_OBJSET_ZPLPROPS */ - 19, /* 20 ZFS_IOC_DATASET_LIST_NEXT */ - 20, /* 21 ZFS_IOC_SNAPSHOT_LIST_NEXT */ - 21, /* 22 ZFS_IOC_SET_PROP */ - 24, /* 23 ZFS_IOC_CREATE */ - 25, /* 24 ZFS_IOC_DESTROY */ - 26, /* 25 ZFS_IOC_ROLLBACK */ - 27, /* 26 ZFS_IOC_RENAME */ - 28, /* 27 ZFS_IOC_RECV */ - 29, /* 28 ZFS_IOC_SEND */ - 30, /* 39 ZFS_IOC_INJECT_FAULT */ - 31, /* 30 ZFS_IOC_CLEAR_FAULT */ - 32, /* 31 ZFS_IOC_INJECT_LIST_NEXT */ - 33, /* 32 ZFS_IOC_ERROR_LOG */ - 34, /* 33 ZFS_IOC_CLEAR */ - 35, /* 34 ZFS_IOC_PROMOTE */ - 36, /* 35 ZFS_IOC_DESTROY_SNAPS */ - 37, /* 36 ZFS_IOC_SNAPSHOT */ - 38, /* 37 ZFS_IOC_DSOBJ_TO_DSNAME */ - 39, /* 38 ZFS_IOC_OBJ_TO_PATH */ - 40, /* 39 ZFS_IOC_POOL_SET_PROPS */ - 41, /* 40 ZFS_IOC_POOL_GET_PROPS */ - 42, /* 41 ZFS_IOC_SET_FSACL */ - 43, /* 42 ZFS_IOC_GET_FSACL */ - 45, /* 43 ZFS_IOC_SHARE */ - 46, /* 44 ZFS_IOC_IHNERIT_PROP */ - 49, /* 45 ZFS_IOC_SMB_ACL */ - 50, /* 46 ZFS_IOC_USERSPACE_ONE */ - 51, /* 47 ZFS_IOC_USERSPACE_MANY */ - 52, /* 48 ZFS_IOC_USERSPACE_UPGRADE */ - ZFS_IOC_COMPAT_FAIL, /* 49 ZFS_IOC_HOLD */ - ZFS_IOC_COMPAT_FAIL, /* 50 ZFS_IOC_RELEASE */ - ZFS_IOC_COMPAT_FAIL, /* 51 ZFS_IOC_GET_HOLDS */ - ZFS_IOC_COMPAT_FAIL, /* 52 ZFS_IOC_OBJSET_RECVD_PROPS */ - ZFS_IOC_COMPAT_FAIL, /* 53 ZFS_IOC_VDEV_SPLIT */ - ZFS_IOC_COMPAT_FAIL, /* 54 ZFS_IOC_NEXT_OBJ */ - ZFS_IOC_COMPAT_FAIL, /* 55 ZFS_IOC_DIFF */ - ZFS_IOC_COMPAT_FAIL, /* 56 ZFS_IOC_TMP_SNAPSHOT */ - ZFS_IOC_COMPAT_FAIL, /* 57 ZFS_IOC_OBJ_TO_STATS */ - 47, /* 58 ZFS_IOC_JAIL */ - 48, /* 59 ZFS_IOC_UNJAIL */ -}; -#endif /* ! _KERNEL */ - -#ifdef _KERNEL -int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int); -void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int); -nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int, - const int); -nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int, - const int); -#else -int zcmd_ioctl_compat(int, int, zfs_cmd_t *, const int); -#endif /* _KERNEL */ -void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int); -void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_ZFS_IOCTL_COMPAT_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c deleted file mode 100644 index bad8f20e6917..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2013, 2016 by Delphix. All rights reserved. - */ - -/* - * Common name validation routines for ZFS. These routines are shared by the - * userland code as well as the ioctl() layer to ensure that we don't - * inadvertently expose a hole through direct ioctl()s that never gets tested. - * In userland, however, we want significantly more information about _why_ the - * name is invalid. In the kernel, we only care whether it's valid or not. - * Each routine therefore takes a 'namecheck_err_t' which describes exactly why - * the name failed to validate. - */ - -#if defined(_KERNEL) -#include <sys/systm.h> -#else -#include <string.h> -#endif - -#include <sys/dsl_dir.h> -#include <sys/param.h> -#include <sys/nvpair.h> -#include "zfs_namecheck.h" -#include "zfs_deleg.h" - -/* - * Deeply nested datasets can overflow the stack, so we put a limit - * in the amount of nesting a path can have. zfs_max_dataset_nesting - * can be tuned temporarily to fix existing datasets that exceed our - * predefined limit. - */ -int zfs_max_dataset_nesting = 50; - -static int -valid_char(char c) -{ - return ((c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - c == '-' || c == '_' || c == '.' || c == ':' || c == ' '); -} - -/* - * Looks at a path and returns its level of nesting (depth). - */ -int -get_dataset_depth(const char *path) -{ - const char *loc = path; - int nesting = 0; - - /* - * Keep track of nesting until you hit the end of the - * path or found the snapshot/bookmark seperator. - */ - for (int i = 0; loc[i] != '\0' && - loc[i] != '@' && - loc[i] != '#'; i++) { - if (loc[i] == '/') - nesting++; - } - - return (nesting); -} - -/* - * Snapshot names must be made up of alphanumeric characters plus the following - * characters: - * - * [-_.: ] - * - * Returns 0 on success, -1 on error. - */ -int -zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what) -{ - const char *loc; - - if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) { - if (why) - *why = NAME_ERR_TOOLONG; - return (-1); - } - - if (path[0] == '\0') { - if (why) - *why = NAME_ERR_EMPTY_COMPONENT; - return (-1); - } - - for (loc = path; *loc; loc++) { - if (!valid_char(*loc)) { - if (why) { - *why = NAME_ERR_INVALCHAR; - *what = *loc; - } - return (-1); - } - } - return (0); -} - - -/* - * Permissions set name must start with the letter '@' followed by the - * same character restrictions as snapshot names, except that the name - * cannot exceed 64 characters. - * - * Returns 0 on success, -1 on error. - */ -int -permset_namecheck(const char *path, namecheck_err_t *why, char *what) -{ - if (strlen(path) >= ZFS_PERMSET_MAXLEN) { - if (why) - *why = NAME_ERR_TOOLONG; - return (-1); - } - - if (path[0] != '@') { - if (why) { - *why = NAME_ERR_NO_AT; - *what = path[0]; - } - return (-1); - } - - return (zfs_component_namecheck(&path[1], why, what)); -} - -/* - * Dataset paths should not be deeper than zfs_max_dataset_nesting - * in terms of nesting. - * - * Returns 0 on success, -1 on error. - */ -int -dataset_nestcheck(const char *path) -{ - return ((get_dataset_depth(path) < zfs_max_dataset_nesting) ? 0 : -1); -} - -/* - * Entity names must be of the following form: - * - * [component/]*[component][(@|#)component]? - * - * Where each component is made up of alphanumeric characters plus the following - * characters: - * - * [-_.:%] - * - * We allow '%' here as we use that character internally to create unique - * names for temporary clones (for online recv). - * - * Returns 0 on success, -1 on error. - */ -int -entity_namecheck(const char *path, namecheck_err_t *why, char *what) -{ - const char *end; - - /* - * Make sure the name is not too long. - */ - if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) { - if (why) - *why = NAME_ERR_TOOLONG; - return (-1); - } - - /* Explicitly check for a leading slash. */ - if (path[0] == '/') { - if (why) - *why = NAME_ERR_LEADING_SLASH; - return (-1); - } - - if (path[0] == '\0') { - if (why) - *why = NAME_ERR_EMPTY_COMPONENT; - return (-1); - } - - const char *start = path; - boolean_t found_delim = B_FALSE; - for (;;) { - /* Find the end of this component */ - end = start; - while (*end != '/' && *end != '@' && *end != '#' && - *end != '\0') - end++; - - if (*end == '\0' && end[-1] == '/') { - /* trailing slashes are not allowed */ - if (why) - *why = NAME_ERR_TRAILING_SLASH; - return (-1); - } - - /* Validate the contents of this component */ - for (const char *loc = start; loc != end; loc++) { - if (!valid_char(*loc) && *loc != '%') { - if (why) { - *why = NAME_ERR_INVALCHAR; - *what = *loc; - } - return (-1); - } - } - - /* Snapshot or bookmark delimiter found */ - if (*end == '@' || *end == '#') { - /* Multiple delimiters are not allowed */ - if (found_delim != 0) { - if (why) - *why = NAME_ERR_MULTIPLE_DELIMITERS; - return (-1); - } - - found_delim = B_TRUE; - } - - /* Zero-length components are not allowed */ - if (start == end) { - if (why) - *why = NAME_ERR_EMPTY_COMPONENT; - return (-1); - } - - /* If we've reached the end of the string, we're OK */ - if (*end == '\0') - return (0); - - /* - * If there is a '/' in a snapshot or bookmark name - * then report an error - */ - if (*end == '/' && found_delim != 0) { - if (why) - *why = NAME_ERR_TRAILING_SLASH; - return (-1); - } - - /* Update to the next component */ - start = end + 1; - } -} - -/* - * Dataset is any entity, except bookmark - */ -int -dataset_namecheck(const char *path, namecheck_err_t *why, char *what) -{ - int ret = entity_namecheck(path, why, what); - - if (ret == 0 && strchr(path, '#') != NULL) { - if (why != NULL) { - *why = NAME_ERR_INVALCHAR; - *what = '#'; - } - return (-1); - } - - return (ret); -} - -/* - * mountpoint names must be of the following form: - * - * /[component][/]*[component][/] - * - * Returns 0 on success, -1 on error. - */ -int -mountpoint_namecheck(const char *path, namecheck_err_t *why) -{ - const char *start, *end; - - /* - * Make sure none of the mountpoint component names are too long. - * If a component name is too long then the mkdir of the mountpoint - * will fail but then the mountpoint property will be set to a value - * that can never be mounted. Better to fail before setting the prop. - * Extra slashes are OK, they will be tossed by the mountpoint mkdir. - */ - - if (path == NULL || *path != '/') { - if (why) - *why = NAME_ERR_LEADING_SLASH; - return (-1); - } - - /* Skip leading slash */ - start = &path[1]; - do { - end = start; - while (*end != '/' && *end != '\0') - end++; - - if (end - start >= ZFS_MAX_DATASET_NAME_LEN) { - if (why) - *why = NAME_ERR_TOOLONG; - return (-1); - } - start = end + 1; - - } while (*end != '\0'); - - return (0); -} - -/* - * For pool names, we have the same set of valid characters as described in - * dataset names, with the additional restriction that the pool name must begin - * with a letter. The pool names 'raidz' and 'mirror' are also reserved names - * that cannot be used. - * - * Returns 0 on success, -1 on error. - */ -int -pool_namecheck(const char *pool, namecheck_err_t *why, char *what) -{ - const char *c; - - /* - * Make sure the name is not too long. - * If we're creating a pool with version >= SPA_VERSION_DSL_SCRUB (v11) - * we need to account for additional space needed by the origin ds which - * will also be snapshotted: "poolname"+"/"+"$ORIGIN"+"@"+"$ORIGIN". - * Play it safe and enforce this limit even if the pool version is < 11 - * so it can be upgraded without issues. - */ - if (strlen(pool) >= (ZFS_MAX_DATASET_NAME_LEN - 2 - - strlen(ORIGIN_DIR_NAME) * 2)) { - if (why) - *why = NAME_ERR_TOOLONG; - return (-1); - } - - c = pool; - while (*c != '\0') { - if (!valid_char(*c)) { - if (why) { - *why = NAME_ERR_INVALCHAR; - *what = *c; - } - return (-1); - } - c++; - } - - if (!(*pool >= 'a' && *pool <= 'z') && - !(*pool >= 'A' && *pool <= 'Z')) { - if (why) - *why = NAME_ERR_NOLETTER; - return (-1); - } - - if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) { - if (why) - *why = NAME_ERR_RESERVED; - return (-1); - } - - if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) { - if (why) - *why = NAME_ERR_DISKLIKE; - return (-1); - } - - return (0); -} diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h deleted file mode 100644 index 527db92b0cfa..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_namecheck.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2013, 2016 by Delphix. All rights reserved. - */ - -#ifndef _ZFS_NAMECHECK_H -#define _ZFS_NAMECHECK_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - NAME_ERR_LEADING_SLASH, /* name begins with leading slash */ - NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */ - NAME_ERR_TRAILING_SLASH, /* name ends with a slash */ - NAME_ERR_INVALCHAR, /* invalid character found */ - NAME_ERR_MULTIPLE_DELIMITERS, /* multiple '@'/'#' delimiters found */ - NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */ - NAME_ERR_RESERVED, /* entire name is reserved */ - NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ - NAME_ERR_TOOLONG, /* name is too long */ - NAME_ERR_NO_AT, /* permission set is missing '@' */ -} namecheck_err_t; - -#define ZFS_PERMSET_MAXLEN 64 - -extern int zfs_max_dataset_nesting; - -int get_dataset_depth(const char *); -int pool_namecheck(const char *, namecheck_err_t *, char *); -int entity_namecheck(const char *, namecheck_err_t *, char *); -int dataset_namecheck(const char *, namecheck_err_t *, char *); -int dataset_nestcheck(const char *); -int mountpoint_namecheck(const char *, namecheck_err_t *); -int zfs_component_namecheck(const char *, namecheck_err_t *, char *); -int permset_namecheck(const char *, namecheck_err_t *, char *); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_NAMECHECK_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c deleted file mode 100644 index ac8da491a9ec..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. - * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - */ - -/* Portions Copyright 2010 Robert Milkowski */ - -#include <sys/zio.h> -#include <sys/spa.h> -#include <sys/u8_textprep.h> -#include <sys/zfs_acl.h> -#include <sys/zfs_ioctl.h> -#include <sys/zfs_znode.h> - -#include "zfs_prop.h" -#include "zfs_deleg.h" - -#if defined(_KERNEL) -#include <sys/systm.h> -#else -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#endif - -static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS]; - -/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */ -const char *zfs_userquota_prop_prefixes[] = { - "userused@", - "userquota@", - "groupused@", - "groupquota@" -}; - -zprop_desc_t * -zfs_prop_get_table(void) -{ - return (zfs_prop_table); -} - -void -zfs_prop_init(void) -{ - static zprop_index_t checksum_table[] = { - { "on", ZIO_CHECKSUM_ON }, - { "off", ZIO_CHECKSUM_OFF }, - { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, - { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, - { "sha256", ZIO_CHECKSUM_SHA256 }, - { "noparity", ZIO_CHECKSUM_NOPARITY }, - { "sha512", ZIO_CHECKSUM_SHA512 }, - { "skein", ZIO_CHECKSUM_SKEIN }, -#ifdef illumos - { "edonr", ZIO_CHECKSUM_EDONR }, -#endif - { NULL } - }; - - static zprop_index_t dedup_table[] = { - { "on", ZIO_CHECKSUM_ON }, - { "off", ZIO_CHECKSUM_OFF }, - { "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY }, - { "sha256", ZIO_CHECKSUM_SHA256 }, - { "sha256,verify", - ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY }, - { "sha512", ZIO_CHECKSUM_SHA512 }, - { "sha512,verify", - ZIO_CHECKSUM_SHA512 | ZIO_CHECKSUM_VERIFY }, - { "skein", ZIO_CHECKSUM_SKEIN }, - { "skein,verify", - ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY }, -#ifdef illumos - { "edonr,verify", - ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY }, -#endif - { NULL } - }; - - static zprop_index_t compress_table[] = { - { "on", ZIO_COMPRESS_ON }, - { "off", ZIO_COMPRESS_OFF }, - { "lzjb", ZIO_COMPRESS_LZJB }, - { "gzip", ZIO_COMPRESS_GZIP_6 }, /* gzip default */ - { "gzip-1", ZIO_COMPRESS_GZIP_1 }, - { "gzip-2", ZIO_COMPRESS_GZIP_2 }, - { "gzip-3", ZIO_COMPRESS_GZIP_3 }, - { "gzip-4", ZIO_COMPRESS_GZIP_4 }, - { "gzip-5", ZIO_COMPRESS_GZIP_5 }, - { "gzip-6", ZIO_COMPRESS_GZIP_6 }, - { "gzip-7", ZIO_COMPRESS_GZIP_7 }, - { "gzip-8", ZIO_COMPRESS_GZIP_8 }, - { "gzip-9", ZIO_COMPRESS_GZIP_9 }, - { "zle", ZIO_COMPRESS_ZLE }, - { "lz4", ZIO_COMPRESS_LZ4 }, - { NULL } - }; - - static zprop_index_t snapdir_table[] = { - { "hidden", ZFS_SNAPDIR_HIDDEN }, - { "visible", ZFS_SNAPDIR_VISIBLE }, - { NULL } - }; - - static zprop_index_t acl_mode_table[] = { - { "discard", ZFS_ACL_DISCARD }, - { "groupmask", ZFS_ACL_GROUPMASK }, - { "passthrough", ZFS_ACL_PASSTHROUGH }, - { "restricted", ZFS_ACL_RESTRICTED }, - { NULL } - }; - - static zprop_index_t acl_inherit_table[] = { - { "discard", ZFS_ACL_DISCARD }, - { "noallow", ZFS_ACL_NOALLOW }, - { "restricted", ZFS_ACL_RESTRICTED }, - { "passthrough", ZFS_ACL_PASSTHROUGH }, - { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */ - { "passthrough-x", ZFS_ACL_PASSTHROUGH_X }, - { NULL } - }; - - static zprop_index_t case_table[] = { - { "sensitive", ZFS_CASE_SENSITIVE }, - { "insensitive", ZFS_CASE_INSENSITIVE }, - { "mixed", ZFS_CASE_MIXED }, - { NULL } - }; - - static zprop_index_t copies_table[] = { - { "1", 1 }, - { "2", 2 }, - { "3", 3 }, - { NULL } - }; - - /* - * Use the unique flags we have to send to u8_strcmp() and/or - * u8_textprep() to represent the various normalization property - * values. - */ - static zprop_index_t normalize_table[] = { - { "none", 0 }, - { "formD", U8_TEXTPREP_NFD }, - { "formKC", U8_TEXTPREP_NFKC }, - { "formC", U8_TEXTPREP_NFC }, - { "formKD", U8_TEXTPREP_NFKD }, - { NULL } - }; - - static zprop_index_t version_table[] = { - { "1", 1 }, - { "2", 2 }, - { "3", 3 }, - { "4", 4 }, - { "5", 5 }, - { "current", ZPL_VERSION }, - { NULL } - }; - - static zprop_index_t boolean_table[] = { - { "off", 0 }, - { "on", 1 }, - { NULL } - }; - - static zprop_index_t logbias_table[] = { - { "latency", ZFS_LOGBIAS_LATENCY }, - { "throughput", ZFS_LOGBIAS_THROUGHPUT }, - { NULL } - }; - - static zprop_index_t canmount_table[] = { - { "off", ZFS_CANMOUNT_OFF }, - { "on", ZFS_CANMOUNT_ON }, - { "noauto", ZFS_CANMOUNT_NOAUTO }, - { NULL } - }; - - static zprop_index_t cache_table[] = { - { "none", ZFS_CACHE_NONE }, - { "metadata", ZFS_CACHE_METADATA }, - { "all", ZFS_CACHE_ALL }, - { NULL } - }; - - static zprop_index_t sync_table[] = { - { "standard", ZFS_SYNC_STANDARD }, - { "always", ZFS_SYNC_ALWAYS }, - { "disabled", ZFS_SYNC_DISABLED }, - { NULL } - }; - - static zprop_index_t volmode_table[] = { - { "default", ZFS_VOLMODE_DEFAULT }, - { "geom", ZFS_VOLMODE_GEOM }, - { "dev", ZFS_VOLMODE_DEV }, - { "none", ZFS_VOLMODE_NONE }, - { NULL } - }; - - static zprop_index_t dnsize_table[] = { - { "legacy", ZFS_DNSIZE_LEGACY }, - { "auto", ZFS_DNSIZE_AUTO }, - { "1k", ZFS_DNSIZE_1K }, - { "2k", ZFS_DNSIZE_2K }, - { "4k", ZFS_DNSIZE_4K }, - { "8k", ZFS_DNSIZE_8K }, - { "16k", ZFS_DNSIZE_16K }, - { NULL } - }; - - static zprop_index_t redundant_metadata_table[] = { - { "all", ZFS_REDUNDANT_METADATA_ALL }, - { "most", ZFS_REDUNDANT_METADATA_MOST }, - { NULL } - }; - - /* inherit index properties */ - zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata", - ZFS_REDUNDANT_METADATA_ALL, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "all | most", "REDUND_MD", - redundant_metadata_table); - zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "standard | always | disabled", "SYNC", - sync_table); - zprop_register_index(ZFS_PROP_CHECKSUM, "checksum", - ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME, - "on | off | fletcher2 | fletcher4 | sha256 | sha512 | " - "skein", "CHECKSUM", checksum_table); - zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "on | off | verify | sha256[,verify], sha512[,verify], " - "skein[,verify]", "DEDUP", dedup_table); - zprop_register_index(ZFS_PROP_COMPRESSION, "compression", - ZIO_COMPRESS_DEFAULT, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4", - "COMPRESS", compress_table); - zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "hidden | visible", "SNAPDIR", snapdir_table); - zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "discard | groupmask | passthrough | restricted", "ACLMODE", - acl_mode_table); - zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", - ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "discard | noallow | restricted | passthrough | passthrough-x", - "ACLINHERIT", acl_inherit_table); - zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "1 | 2 | 3", "COPIES", copies_table); - zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache", - ZFS_CACHE_ALL, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, - "all | none | metadata", "PRIMARYCACHE", cache_table); - zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache", - ZFS_CACHE_ALL, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, - "all | none | metadata", "SECONDARYCACHE", cache_table); - zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, - PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "latency | throughput", "LOGBIAS", logbias_table); - zprop_register_index(ZFS_PROP_VOLMODE, "volmode", - ZFS_VOLMODE_DEFAULT, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, - "default | geom | dev | none", "VOLMODE", volmode_table); - - zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize", - ZFS_DNSIZE_LEGACY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "legacy | auto | 1k | 2k | 4k | 8k | 16k", "DNSIZE", dnsize_table); - - /* inherit index (boolean) properties */ - zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); - zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES", - boolean_table); - zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC", - boolean_table); - zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID", - boolean_table); - zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY", - boolean_table); - zprop_register_index(ZFS_PROP_ZONED, "jailed", 0, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM, "on | off", "JAILED", boolean_table); - zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR", - boolean_table); - zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", - boolean_table); - zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND", - boolean_table); - - /* default index properties */ - zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table); - zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON, - PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", - "CANMOUNT", canmount_table); - - /* readonly index (boolean) properties */ - zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, - ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); - zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, - PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", - boolean_table); - - /* set once index properties */ - zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, - PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "none | formC | formD | formKC | formKD", "NORMALIZATION", - normalize_table); - zprop_register_index(ZFS_PROP_CASE, "casesensitivity", - ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_SNAPSHOT, - "sensitive | insensitive | mixed", "CASE", case_table); - - /* set once index (boolean) properties */ - zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, - "on | off", "UTF8ONLY", boolean_table); - - /* string properties */ - zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN"); - zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY, - ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES"); - zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", - "MOUNTPOINT"); - zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", - "SHARENFS"); - zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY, - ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, - "filesystem | volume | snapshot | bookmark", "TYPE"); - zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", - PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "on | off | sharemgr(1M) options", "SHARESMB"); - zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel", - ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET, - "<sensitivity label>", "MLSLABEL"); - zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN, - "receive_resume_token", - NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "<string token>", "RESUMETOK"); - - /* readonly number properties */ - zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, - ZFS_TYPE_DATASET, "<size>", "USED"); - zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL"); - zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0, - PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER"); - zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, - PROP_READONLY, ZFS_TYPE_DATASET, - "<1.00x or higher if compressed>", "RATIO"); - zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, - PROP_READONLY, ZFS_TYPE_DATASET, - "<1.00x or higher if compressed>", "REFRATIO"); - zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", - ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, - ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); - zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, - PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", - "USEDSNAP"); - zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, - PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", - "USEDDS"); - zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, - PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", - "USEDCHILD"); - zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0, - PROP_READONLY, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV"); - zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY, - ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS"); - zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, - ZFS_TYPE_DATASET, "<size>", "WRITTEN"); - zprop_register_number(ZFS_PROP_LOGICALUSED, "logicalused", 0, - PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", - "LUSED"); - zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced", - 0, PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LREFER"); - - /* default number properties */ - zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, - ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA"); - zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0, - PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "<size> | none", "RESERV"); - zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT, - ZFS_TYPE_VOLUME, "<size>", "VOLSIZE"); - zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT, - ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA"); - zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0, - PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "<size> | none", "REFRESERV"); - zprop_register_number(ZFS_PROP_FILESYSTEM_LIMIT, "filesystem_limit", - UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, - "<count> | none", "FSLIMIT"); - zprop_register_number(ZFS_PROP_SNAPSHOT_LIMIT, "snapshot_limit", - UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "<count> | none", "SSLIMIT"); - zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count", - UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, - "<count>", "FSCOUNT"); - zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count", - UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, - "<count>", "SSCOUNT"); - zprop_register_number(ZFS_PROP_GUID, "guid", 0, PROP_READONLY, - ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID"); - zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY, - ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG"); - - /* inherit number properties */ - zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", - SPA_OLD_MAXBLOCKSIZE, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE"); - zprop_register_number(ZFS_PROP_SPECIAL_SMALL_BLOCKS, - "special_small_blocks", 0, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, - "zero or 512 to 128K, power of 2", "SPECIAL_SMALL_BLOCKS"); - - /* hidden properties */ - zprop_register_hidden(ZFS_PROP_REMAPTXG, "remaptxg", PROP_TYPE_NUMBER, - PROP_READONLY, ZFS_TYPE_DATASET, "REMAPTXG"); - zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER, - PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES"); - zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING, - PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "NAME"); - zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", - PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS"); - zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu", - PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, - "STMF_SBD_LU"); - zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting", - PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, - "USERACCOUNTING"); - zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER, - PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE"); - zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER, - PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); - zprop_register_hidden(ZFS_PROP_INCONSISTENT, "inconsistent", - PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT"); - zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, - PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP"); - - /* oddball properties */ - zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, - NULL, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, - "<date>", "CREATION", B_FALSE, B_TRUE, NULL); -} - -boolean_t -zfs_prop_delegatable(zfs_prop_t prop) -{ - zprop_desc_t *pd = &zfs_prop_table[prop]; - - /* The mlslabel property is never delegatable. */ - if (prop == ZFS_PROP_MLSLABEL) - return (B_FALSE); - - return (pd->pd_attr != PROP_READONLY); -} - -/* - * Given a zfs dataset property name, returns the corresponding property ID. - */ -zfs_prop_t -zfs_name_to_prop(const char *propname) -{ - return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET)); -} - -/* - * For user property names, we allow all lowercase alphanumeric characters, plus - * a few useful punctuation characters. - */ -static int -valid_char(char c) -{ - return ((c >= 'a' && c <= 'z') || - (c >= '0' && c <= '9') || - c == '-' || c == '_' || c == '.' || c == ':'); -} - -/* - * Returns true if this is a valid user-defined property (one with a ':'). - */ -boolean_t -zfs_prop_user(const char *name) -{ - int i; - char c; - boolean_t foundsep = B_FALSE; - - for (i = 0; i < strlen(name); i++) { - c = name[i]; - if (!valid_char(c)) - return (B_FALSE); - if (c == ':') - foundsep = B_TRUE; - } - - if (!foundsep) - return (B_FALSE); - - return (B_TRUE); -} - -/* - * Returns true if this is a valid userspace-type property (one with a '@'). - * Note that after the @, any character is valid (eg, another @, for SID - * user@domain). - */ -boolean_t -zfs_prop_userquota(const char *name) -{ - zfs_userquota_prop_t prop; - - for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) { - if (strncmp(name, zfs_userquota_prop_prefixes[prop], - strlen(zfs_userquota_prop_prefixes[prop])) == 0) { - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* - * Returns true if this is a valid written@ property. - * Note that after the @, any character is valid (eg, another @, for - * written@pool/fs@origin). - */ -boolean_t -zfs_prop_written(const char *name) -{ - static const char *prefix = "written@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - -/* - * Tables of index types, plus functions to convert between the user view - * (strings) and internal representation (uint64_t). - */ -int -zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index) -{ - return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET)); -} - -int -zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string) -{ - return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET)); -} - -uint64_t -zfs_prop_random_value(zfs_prop_t prop, uint64_t seed) -{ - return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET)); -} - -/* - * Returns TRUE if the property applies to any of the given dataset types. - */ -boolean_t -zfs_prop_valid_for_type(int prop, zfs_type_t types) -{ - return (zprop_valid_for_type(prop, types)); -} - -zprop_type_t -zfs_prop_get_type(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_proptype); -} - -/* - * Returns TRUE if the property is readonly. - */ -boolean_t -zfs_prop_readonly(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_attr == PROP_READONLY || - zfs_prop_table[prop].pd_attr == PROP_ONETIME); -} - -/* - * Returns TRUE if the property is visible (not hidden). - */ -boolean_t -zfs_prop_visible(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_visible); -} - -/* - * Returns TRUE if the property is only allowed to be set once. - */ -boolean_t -zfs_prop_setonce(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); -} - -const char * -zfs_prop_default_string(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_strdefault); -} - -uint64_t -zfs_prop_default_numeric(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_numdefault); -} - -/* - * Given a dataset property ID, returns the corresponding name. - * Assuming the zfs dataset property ID is valid. - */ -const char * -zfs_prop_to_name(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_name); -} - -/* - * Returns TRUE if the property is inheritable. - */ -boolean_t -zfs_prop_inheritable(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_attr == PROP_INHERIT || - zfs_prop_table[prop].pd_attr == PROP_ONETIME); -} - -#ifndef _KERNEL - -/* - * Returns a string describing the set of acceptable values for the given - * zfs property, or NULL if it cannot be set. - */ -const char * -zfs_prop_values(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_values); -} - -/* - * Returns TRUE if this property is a string type. Note that index types - * (compression, checksum) are treated as strings in userland, even though they - * are stored numerically on disk. - */ -int -zfs_prop_is_string(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING || - zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX); -} - -/* - * Returns the column header for the given property. Used only in - * 'zfs list -o', but centralized here with the other property information. - */ -const char * -zfs_prop_column_name(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_colname); -} - -/* - * Returns whether the given property should be displayed right-justified for - * 'zfs list'. - */ -boolean_t -zfs_prop_align_right(zfs_prop_t prop) -{ - return (zfs_prop_table[prop].pd_rightalign); -} - -#endif diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h deleted file mode 100644 index e604abda131d..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _ZFS_PROP_H -#define _ZFS_PROP_H - -#include <sys/fs/zfs.h> -#include <sys/types.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * For index types (e.g. compression and checksum), we want the numeric value - * in the kernel, but the string value in userland. - */ -typedef enum { - PROP_TYPE_NUMBER, /* numeric value */ - PROP_TYPE_STRING, /* string value */ - PROP_TYPE_INDEX /* numeric value indexed by string */ -} zprop_type_t; - -typedef enum { - PROP_DEFAULT, - PROP_READONLY, - PROP_INHERIT, - /* - * ONETIME properties are a sort of conglomeration of READONLY - * and INHERIT. They can be set only during object creation, - * after that they are READONLY. If not explicitly set during - * creation, they can be inherited. - */ - PROP_ONETIME -} zprop_attr_t; - -typedef struct zfs_index { - const char *pi_name; - uint64_t pi_value; -} zprop_index_t; - -typedef struct { - const char *pd_name; /* human-readable property name */ - int pd_propnum; /* property number */ - zprop_type_t pd_proptype; /* string, boolean, index, number */ - const char *pd_strdefault; /* default for strings */ - uint64_t pd_numdefault; /* for boolean / index / number */ - zprop_attr_t pd_attr; /* default, readonly, inherit */ - int pd_types; /* bitfield of valid dataset types */ - /* fs | vol | snap; or pool */ - const char *pd_values; /* string telling acceptable values */ - const char *pd_colname; /* column header for "zfs list" */ - boolean_t pd_rightalign; /* column alignment for "zfs list" */ - boolean_t pd_visible; /* do we list this property with the */ - /* "zfs get" help message */ - const zprop_index_t *pd_table; /* for index properties, a table */ - /* defining the possible values */ - size_t pd_table_size; /* number of entries in pd_table[] */ -} zprop_desc_t; - -/* - * zfs dataset property functions - */ -void zfs_prop_init(void); -zprop_type_t zfs_prop_get_type(zfs_prop_t); -boolean_t zfs_prop_delegatable(zfs_prop_t prop); -zprop_desc_t *zfs_prop_get_table(void); - -/* - * zpool property functions - */ -void zpool_prop_init(void); -zprop_type_t zpool_prop_get_type(zpool_prop_t); -zprop_desc_t *zpool_prop_get_table(void); - -/* - * Common routines to initialize property tables - */ -void zprop_register_impl(int, const char *, zprop_type_t, uint64_t, - const char *, zprop_attr_t, int, const char *, const char *, - boolean_t, boolean_t, const zprop_index_t *); -void zprop_register_string(int, const char *, const char *, - zprop_attr_t attr, int, const char *, const char *); -void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int, - const char *, const char *); -void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int, - const char *, const char *, const zprop_index_t *); -void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t, - int, const char *); - -/* - * Common routines for zfs and zpool property management - */ -int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t); -int zprop_name_to_prop(const char *, zfs_type_t); -int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t); -int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t); -uint64_t zprop_random_value(int, uint64_t, zfs_type_t); -const char *zprop_values(int, zfs_type_t); -size_t zprop_width(int, boolean_t *, zfs_type_t); -boolean_t zprop_valid_for_type(int, zfs_type_t); -boolean_t zfs_prop_written(const char *name); - - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFS_PROP_H */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c deleted file mode 100644 index d17c7fd98043..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright (c) 2014 Integros [integros.com] - */ - -#include <sys/zio.h> -#include <sys/spa.h> -#include <sys/zfs_acl.h> -#include <sys/zfs_ioctl.h> -#include <sys/fs/zfs.h> - -#include "zfs_prop.h" - -#if defined(_KERNEL) -#include <sys/systm.h> -#else -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#endif - -static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS]; - -zprop_desc_t * -zpool_prop_get_table(void) -{ - return (zpool_prop_table); -} - -void -zpool_prop_init(void) -{ - static zprop_index_t boolean_table[] = { - { "off", 0}, - { "on", 1}, - { NULL } - }; - - static zprop_index_t failuremode_table[] = { - { "wait", ZIO_FAILURE_MODE_WAIT }, - { "continue", ZIO_FAILURE_MODE_CONTINUE }, - { "panic", ZIO_FAILURE_MODE_PANIC }, - { NULL } - }; - - /* string properties */ - zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT, - ZFS_TYPE_POOL, "<path>", "ALTROOT"); - zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT, - ZFS_TYPE_POOL, "<filesystem>", "BOOTFS"); - zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE"); - zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL, - PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT"); - - /* readonly number properties */ - zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "SIZE"); - zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "FREE"); - zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "FREEING"); - zprop_register_number(ZPOOL_PROP_CHECKPOINT, "checkpoint", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<size>", "CKPOINT"); - zprop_register_number(ZPOOL_PROP_LEAKED, "leaked", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "LEAKED"); - zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC"); - zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<size>", "EXPANDSZ"); - zprop_register_number(ZPOOL_PROP_FRAGMENTATION, "fragmentation", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<percent>", "FRAG"); - zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<size>", "CAP"); - zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<guid>", "GUID"); - zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY, - ZFS_TYPE_POOL, "<state>", "HEALTH"); - zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0, - PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>", - "DEDUP"); - - /* system partition size */ - zprop_register_number(ZPOOL_PROP_BOOTSIZE, "bootsize", 0, PROP_ONETIME, - ZFS_TYPE_POOL, "<size>", "BOOTSIZE"); - - /* default number properties */ - zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, - PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION"); - zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO"); - - /* default index (boolean) properties */ - zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION", - boolean_table); - zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table); - zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS", - boolean_table); - zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table); - zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table); - zprop_register_index(ZPOOL_PROP_MULTIHOST, "multihost", 0, - PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "MULTIHOST", - boolean_table); - - /* default index properties */ - zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", - ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL, - "wait | continue | panic", "FAILMODE", failuremode_table); - - /* hidden properties */ - zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, - PROP_READONLY, ZFS_TYPE_POOL, "NAME"); - zprop_register_hidden(ZPOOL_PROP_MAXBLOCKSIZE, "maxblocksize", - PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXBLOCKSIZE"); - zprop_register_hidden(ZPOOL_PROP_TNAME, "tname", PROP_TYPE_STRING, - PROP_ONETIME, ZFS_TYPE_POOL, "TNAME"); - zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize", - PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE"); -} - -/* - * Given a property name and its type, returns the corresponding property ID. - */ -zpool_prop_t -zpool_name_to_prop(const char *propname) -{ - return (zprop_name_to_prop(propname, ZFS_TYPE_POOL)); -} - -/* - * Given a pool property ID, returns the corresponding name. - * Assuming the pool propety ID is valid. - */ -const char * -zpool_prop_to_name(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_name); -} - -zprop_type_t -zpool_prop_get_type(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_proptype); -} - -boolean_t -zpool_prop_readonly(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_attr == PROP_READONLY); -} - -const char * -zpool_prop_default_string(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_strdefault); -} - -uint64_t -zpool_prop_default_numeric(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_numdefault); -} - -/* - * Returns true if this is a valid feature@ property. - */ -boolean_t -zpool_prop_feature(const char *name) -{ - static const char *prefix = "feature@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - -/* - * Returns true if this is a valid unsupported@ property. - */ -boolean_t -zpool_prop_unsupported(const char *name) -{ - static const char *prefix = "unsupported@"; - return (strncmp(name, prefix, strlen(prefix)) == 0); -} - -int -zpool_prop_string_to_index(zpool_prop_t prop, const char *string, - uint64_t *index) -{ - return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL)); -} - -int -zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index, - const char **string) -{ - return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL)); -} - -uint64_t -zpool_prop_random_value(zpool_prop_t prop, uint64_t seed) -{ - return (zprop_random_value(prop, seed, ZFS_TYPE_POOL)); -} - -#ifndef _KERNEL - -const char * -zpool_prop_values(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_values); -} - -const char * -zpool_prop_column_name(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_colname); -} - -boolean_t -zpool_prop_align_right(zpool_prop_t prop) -{ - return (zpool_prop_table[prop].pd_rightalign); -} -#endif diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c deleted file mode 100644 index ca2e72c5daa4..000000000000 --- a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2012 by Delphix. All rights reserved. - */ - -/* - * Common routines used by zfs and zpool property management. - */ - -#include <sys/zio.h> -#include <sys/spa.h> -#include <sys/zfs_acl.h> -#include <sys/zfs_ioctl.h> -#include <sys/zfs_znode.h> -#include <sys/fs/zfs.h> - -#include "zfs_prop.h" -#include "zfs_deleg.h" - -#if defined(_KERNEL) -#include <sys/systm.h> -#include <sys/libkern.h> -#else -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#endif - -static zprop_desc_t * -zprop_get_proptable(zfs_type_t type) -{ - if (type == ZFS_TYPE_POOL) - return (zpool_prop_get_table()); - else - return (zfs_prop_get_table()); -} - -static int -zprop_get_numprops(zfs_type_t type) -{ - if (type == ZFS_TYPE_POOL) - return (ZPOOL_NUM_PROPS); - else - return (ZFS_NUM_PROPS); -} - -void -zprop_register_impl(int prop, const char *name, zprop_type_t type, - uint64_t numdefault, const char *strdefault, zprop_attr_t attr, - int objset_types, const char *values, const char *colname, - boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl) -{ - zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types); - zprop_desc_t *pd; - - pd = &prop_tbl[prop]; - - ASSERT(pd->pd_name == NULL || pd->pd_name == name); - ASSERT(name != NULL); - ASSERT(colname != NULL); - - pd->pd_name = name; - pd->pd_propnum = prop; - pd->pd_proptype = type; - pd->pd_numdefault = numdefault; - pd->pd_strdefault = strdefault; - pd->pd_attr = attr; - pd->pd_types = objset_types; - pd->pd_values = values; - pd->pd_colname = colname; - pd->pd_rightalign = rightalign; - pd->pd_visible = visible; - pd->pd_table = idx_tbl; - pd->pd_table_size = 0; - while (idx_tbl && (idx_tbl++)->pi_name != NULL) - pd->pd_table_size++; -} - -void -zprop_register_string(int prop, const char *name, const char *def, - zprop_attr_t attr, int objset_types, const char *values, - const char *colname) -{ - zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr, - objset_types, values, colname, B_FALSE, B_TRUE, NULL); - -} - -void -zprop_register_number(int prop, const char *name, uint64_t def, - zprop_attr_t attr, int objset_types, const char *values, - const char *colname) -{ - zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr, - objset_types, values, colname, B_TRUE, B_TRUE, NULL); -} - -void -zprop_register_index(int prop, const char *name, uint64_t def, - zprop_attr_t attr, int objset_types, const char *values, - const char *colname, const zprop_index_t *idx_tbl) -{ - zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr, - objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl); -} - -void -zprop_register_hidden(int prop, const char *name, zprop_type_t type, - zprop_attr_t attr, int objset_types, const char *colname) -{ - zprop_register_impl(prop, name, type, 0, NULL, attr, - objset_types, NULL, colname, - type == PROP_TYPE_NUMBER, B_FALSE, NULL); -} - - -/* - * A comparison function we can use to order indexes into property tables. - */ -static int -zprop_compare(const void *arg1, const void *arg2) -{ - const zprop_desc_t *p1 = *((zprop_desc_t **)arg1); - const zprop_desc_t *p2 = *((zprop_desc_t **)arg2); - boolean_t p1ro, p2ro; - - p1ro = (p1->pd_attr == PROP_READONLY); - p2ro = (p2->pd_attr == PROP_READONLY); - - if (p1ro == p2ro) - return (strcmp(p1->pd_name, p2->pd_name)); - - return (p1ro ? -1 : 1); -} - -/* - * Iterate over all properties in the given property table, calling back - * into the specified function for each property. We will continue to - * iterate until we either reach the end or the callback function returns - * something other than ZPROP_CONT. - */ -int -zprop_iter_common(zprop_func func, void *cb, boolean_t show_all, - boolean_t ordered, zfs_type_t type) -{ - int i, j, num_props, size, prop; - zprop_desc_t *prop_tbl; - zprop_desc_t **order; - - prop_tbl = zprop_get_proptable(type); - num_props = zprop_get_numprops(type); - size = num_props * sizeof (zprop_desc_t *); - -#if defined(_KERNEL) - order = kmem_alloc(size, KM_SLEEP); -#else - if ((order = malloc(size)) == NULL) - return (ZPROP_CONT); -#endif - - for (j = 0; j < num_props; j++) - order[j] = &prop_tbl[j]; - - if (ordered) { - qsort((void *)order, num_props, sizeof (zprop_desc_t *), - zprop_compare); - } - - prop = ZPROP_CONT; - for (i = 0; i < num_props; i++) { - if ((order[i]->pd_visible || show_all) && - (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) { - prop = order[i]->pd_propnum; - break; - } - } - -#if defined(_KERNEL) - kmem_free(order, size); -#else - free(order); -#endif - return (prop); -} - -static boolean_t -propname_match(const char *p, size_t len, zprop_desc_t *prop_entry) -{ - const char *propname = prop_entry->pd_name; -#ifndef _KERNEL - const char *colname = prop_entry->pd_colname; - int c; -#endif - - if (len == strlen(propname) && - strncmp(p, propname, len) == 0) - return (B_TRUE); - -#ifndef _KERNEL - if (colname == NULL || len != strlen(colname)) - return (B_FALSE); - - for (c = 0; c < len; c++) - if (p[c] != tolower(colname[c])) - break; - - return (colname[c] == '\0'); -#else - return (B_FALSE); -#endif -} - -typedef struct name_to_prop_cb { - const char *propname; - zprop_desc_t *prop_tbl; -} name_to_prop_cb_t; - -static int -zprop_name_to_prop_cb(int prop, void *cb_data) -{ - name_to_prop_cb_t *data = cb_data; - - if (propname_match(data->propname, strlen(data->propname), - &data->prop_tbl[prop])) - return (prop); - - return (ZPROP_CONT); -} - -int -zprop_name_to_prop(const char *propname, zfs_type_t type) -{ - int prop; - name_to_prop_cb_t cb_data; - - cb_data.propname = propname; - cb_data.prop_tbl = zprop_get_proptable(type); - - prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data, - B_TRUE, B_FALSE, type); - - return (prop == ZPROP_CONT ? ZPROP_INVAL : prop); -} - -int -zprop_string_to_index(int prop, const char *string, uint64_t *index, - zfs_type_t type) -{ - zprop_desc_t *prop_tbl; - const zprop_index_t *idx_tbl; - int i; - - if (prop == ZPROP_INVAL || prop == ZPROP_CONT) - return (-1); - - ASSERT(prop < zprop_get_numprops(type)); - prop_tbl = zprop_get_proptable(type); - if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) - return (-1); - - for (i = 0; idx_tbl[i].pi_name != NULL; i++) { - if (strcmp(string, idx_tbl[i].pi_name) == 0) { - *index = idx_tbl[i].pi_value; - return (0); - } - } - - return (-1); -} - -int -zprop_index_to_string(int prop, uint64_t index, const char **string, - zfs_type_t type) -{ - zprop_desc_t *prop_tbl; - const zprop_index_t *idx_tbl; - int i; - - if (prop == ZPROP_INVAL || prop == ZPROP_CONT) - return (-1); - - ASSERT(prop < zprop_get_numprops(type)); - prop_tbl = zprop_get_proptable(type); - if ((idx_tbl = prop_tbl[prop].pd_table) == NULL) - return (-1); - - for (i = 0; idx_tbl[i].pi_name != NULL; i++) { - if (idx_tbl[i].pi_value == index) { - *string = idx_tbl[i].pi_name; - return (0); - } - } - - return (-1); -} - -/* - * Return a random valid property value. Used by ztest. - */ -uint64_t -zprop_random_value(int prop, uint64_t seed, zfs_type_t type) -{ - zprop_desc_t *prop_tbl; - const zprop_index_t *idx_tbl; - - ASSERT((uint_t)prop < zprop_get_numprops(type)); - prop_tbl = zprop_get_proptable(type); - idx_tbl = prop_tbl[prop].pd_table; - - if (idx_tbl == NULL) - return (seed); - - return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value); -} - -const char * -zprop_values(int prop, zfs_type_t type) -{ - zprop_desc_t *prop_tbl; - - ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); - ASSERT(prop < zprop_get_numprops(type)); - - prop_tbl = zprop_get_proptable(type); - - return (prop_tbl[prop].pd_values); -} - -/* - * Returns TRUE if the property applies to any of the given dataset types. - */ -boolean_t -zprop_valid_for_type(int prop, zfs_type_t type) -{ - zprop_desc_t *prop_tbl; - - if (prop == ZPROP_INVAL || prop == ZPROP_CONT) - return (B_FALSE); - - ASSERT(prop < zprop_get_numprops(type)); - prop_tbl = zprop_get_proptable(type); - return ((prop_tbl[prop].pd_types & type) != 0); -} - -#ifndef _KERNEL - -/* - * Determines the minimum width for the column, and indicates whether it's fixed - * or not. Only string columns are non-fixed. - */ -size_t -zprop_width(int prop, boolean_t *fixed, zfs_type_t type) -{ - zprop_desc_t *prop_tbl, *pd; - const zprop_index_t *idx; - size_t ret; - int i; - - ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT); - ASSERT(prop < zprop_get_numprops(type)); - - prop_tbl = zprop_get_proptable(type); - pd = &prop_tbl[prop]; - - *fixed = B_TRUE; - - /* - * Start with the width of the column name. - */ - ret = strlen(pd->pd_colname); - - /* - * For fixed-width values, make sure the width is large enough to hold - * any possible value. - */ - switch (pd->pd_proptype) { - case PROP_TYPE_NUMBER: - /* - * The maximum length of a human-readable number is 5 characters - * ("20.4M", for example). - */ - if (ret < 5) - ret = 5; - /* - * 'creation' is handled specially because it's a number - * internally, but displayed as a date string. - */ - if (prop == ZFS_PROP_CREATION) - *fixed = B_FALSE; - break; - case PROP_TYPE_INDEX: - idx = prop_tbl[prop].pd_table; - for (i = 0; idx[i].pi_name != NULL; i++) { - if (strlen(idx[i].pi_name) > ret) - ret = strlen(idx[i].pi_name); - } - break; - - case PROP_TYPE_STRING: - *fixed = B_FALSE; - break; - } - - return (ret); -} - -#endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c index 8399be770bb0..3d68a68ba819 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c @@ -67,13 +67,15 @@ * on capital-f functions. */ #include <sys/errno.h> +#include <sys/param.h> +#include <sys/types.h> #ifndef illumos #include <sys/time.h> #endif #include <sys/stat.h> -#include <sys/modctl.h> #include <sys/conf.h> #include <sys/systm.h> +#include <sys/endian.h> #ifdef illumos #include <sys/ddi.h> #include <sys/sunddi.h> @@ -96,7 +98,6 @@ #include <sys/panic.h> #include <sys/priv_impl.h> #endif -#include <sys/policy.h> #ifdef illumos #include <sys/cred_impl.h> #include <sys/procfs_isa.h> @@ -119,6 +120,7 @@ #include <sys/limits.h> #include <sys/linker.h> #include <sys/kdb.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/lock.h> @@ -129,6 +131,13 @@ #include <sys/sx.h> #include <sys/sysctl.h> + +#include <sys/mount.h> +#undef AT_UID +#undef AT_GID +#include <sys/vnode.h> +#include <sys/cred.h> + #include <sys/dtrace_bsd.h> #include <netinet/in.h> @@ -299,8 +308,10 @@ static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ #define ipaddr_t in_addr_t #define mod_modname pathname #define vuprintf vprintf +#ifndef crgetzoneid +#define crgetzoneid(_a) 0 +#endif #define ttoproc(_a) ((_a)->td_proc) -#define crgetzoneid(_a) 0 #define SNOCD 0 #define CPU_ON_INTR(_a) 0 @@ -491,7 +502,7 @@ do { \ if ((remp) != NULL) { \ *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ } \ -_NOTE(CONSTCOND) } while (0) +} while (0) /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c index d5be43f0c3d1..4771a67a9f09 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c +++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c @@ -35,6 +35,7 @@ #include <sys/atomic.h> #include <sys/errno.h> #include <sys/stat.h> +#include <sys/endian.h> #include <sys/modctl.h> #include <sys/conf.h> #include <sys/systm.h> @@ -54,6 +55,8 @@ #include <sys/dtrace_impl.h> #include <sys/sysmacros.h> #include <sys/proc.h> +#undef AT_UID +#undef AT_GID #include <sys/policy.h> #ifdef illumos #include <util/qsort.h> diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c deleted file mode 100644 index 6d82470d220a..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * University Copyright- Copyright (c) 1982, 1986, 1988 - * The Regents of the University of California - * All Rights Reserved - * - * University Acknowledgment- Portions of this document are derived from - * software developed by the University of California, Berkeley, and its - * contributors. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/taskq.h> -#include <sys/vnode.h> - -/* Extensible attribute (xva) routines. */ - -/* - * Zero out the structure, set the size of the requested/returned bitmaps, - * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer - * to the returned attributes array. - */ -void -xva_init(xvattr_t *xvap) -{ - bzero(xvap, sizeof (xvattr_t)); - xvap->xva_mapsize = XVA_MAPSIZE; - xvap->xva_magic = XVA_MAGIC; - xvap->xva_vattr.va_mask = AT_XVATTR; - xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0]; -} - -/* - * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t - * structure. Otherwise, returns NULL. - */ -xoptattr_t * -xva_getxoptattr(xvattr_t *xvap) -{ - xoptattr_t *xoap = NULL; - if (xvap->xva_vattr.va_mask & AT_XVATTR) - xoap = &xvap->xva_xoptattrs; - return (xoap); -} - -/* - * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it - * asynchronously using a taskq. This can avoid deadlocks caused by re-entering - * the file system as a result of releasing the vnode. Note, file systems - * already have to handle the race where the vnode is incremented before the - * inactive routine is called and does its locking. - * - * Warning: Excessive use of this routine can lead to performance problems. - * This is because taskqs throttle back allocation if too many are created. - */ -void -vn_rele_async(vnode_t *vp, taskq_t *taskq) -{ - VERIFY(vp->v_count > 0); - if (refcount_release_if_not_last(&vp->v_usecount)) { - return; - } - VERIFY(taskq_dispatch((taskq_t *)taskq, - (task_func_t *)vrele, vp, TQ_SLEEP) != 0); -} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash deleted file mode 100644 index e558b2a50358..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2011 Google, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash.descrip b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash.descrip deleted file mode 100644 index f98cb76dfc91..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.cityhash.descrip +++ /dev/null @@ -1 +0,0 @@ -CITYHASH CHECKSUM FUNCTIONALITY IN ZFS diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4 b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4 deleted file mode 100644 index 722cc75f01e9..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4 +++ /dev/null @@ -1,30 +0,0 @@ -LZ4 - Fast LZ compression algorithm -Copyright (C) 2011-2013, Yann Collet. -BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html -- LZ4 source repository : http://code.google.com/p/lz4/ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4.descrip b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4.descrip deleted file mode 100644 index 211f679b5749..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/THIRDPARTYLICENSE.lz4.descrip +++ /dev/null @@ -1 +0,0 @@ -LZ4 COMPRESSION FUNCTIONALITY IN ZFS diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c deleted file mode 100644 index 1843c8161038..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c +++ /dev/null @@ -1,960 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright (c) 2014 by Chunwei Chen. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * ARC buffer data (ABD). - * - * ABDs are an abstract data structure for the ARC which can use two - * different ways of storing the underlying data: - * - * (a) Linear buffer. In this case, all the data in the ABD is stored in one - * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache). - * - * +-------------------+ - * | ABD (linear) | - * | abd_flags = ... | - * | abd_size = ... | +--------------------------------+ - * | abd_buf ------------->| raw buffer of size abd_size | - * +-------------------+ +--------------------------------+ - * no abd_chunks - * - * (b) Scattered buffer. In this case, the data in the ABD is split into - * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers - * to the chunks recorded in an array at the end of the ABD structure. - * - * +-------------------+ - * | ABD (scattered) | - * | abd_flags = ... | - * | abd_size = ... | - * | abd_offset = 0 | +-----------+ - * | abd_chunks[0] ----------------------------->| chunk 0 | - * | abd_chunks[1] ---------------------+ +-----------+ - * | ... | | +-----------+ - * | abd_chunks[N-1] ---------+ +------->| chunk 1 | - * +-------------------+ | +-----------+ - * | ... - * | +-----------+ - * +----------------->| chunk N-1 | - * +-----------+ - * - * Using a large proportion of scattered ABDs decreases ARC fragmentation since - * when we are at the limit of allocatable space, using equal-size chunks will - * allow us to quickly reclaim enough space for a new large allocation (assuming - * it is also scattered). - * - * In addition to directly allocating a linear or scattered ABD, it is also - * possible to create an ABD by requesting the "sub-ABD" starting at an offset - * within an existing ABD. In linear buffers this is simple (set abd_buf of - * the new ABD to the starting point within the original raw buffer), but - * scattered ABDs are a little more complex. The new ABD makes a copy of the - * relevant abd_chunks pointers (but not the underlying data). However, to - * provide arbitrary rather than only chunk-aligned starting offsets, it also - * tracks an abd_offset field which represents the starting point of the data - * within the first chunk in abd_chunks. For both linear and scattered ABDs, - * creating an offset ABD marks the original ABD as the offset's parent, and the - * original ABD's abd_children refcount is incremented. This data allows us to - * ensure the root ABD isn't deleted before its children. - * - * Most consumers should never need to know what type of ABD they're using -- - * the ABD public API ensures that it's possible to transparently switch from - * using a linear ABD to a scattered one when doing so would be beneficial. - * - * If you need to use the data within an ABD directly, if you know it's linear - * (because you allocated it) you can use abd_to_buf() to access the underlying - * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions - * which will allocate a raw buffer if necessary. Use the abd_return_buf* - * functions to return any raw buffers that are no longer necessary when you're - * done using them. - * - * There are a variety of ABD APIs that implement basic buffer operations: - * compare, copy, read, write, and fill with zeroes. If you need a custom - * function which progressively accesses the whole ABD, use the abd_iterate_* - * functions. - */ - -#include <sys/abd.h> -#include <sys/param.h> -#include <sys/zio.h> -#include <sys/zfs_context.h> -#include <sys/zfs_znode.h> - -typedef struct abd_stats { - kstat_named_t abdstat_struct_size; - kstat_named_t abdstat_scatter_cnt; - kstat_named_t abdstat_scatter_data_size; - kstat_named_t abdstat_scatter_chunk_waste; - kstat_named_t abdstat_linear_cnt; - kstat_named_t abdstat_linear_data_size; -} abd_stats_t; - -static abd_stats_t abd_stats = { - /* Amount of memory occupied by all of the abd_t struct allocations */ - { "struct_size", KSTAT_DATA_UINT64 }, - /* - * The number of scatter ABDs which are currently allocated, excluding - * ABDs which don't own their data (for instance the ones which were - * allocated through abd_get_offset()). - */ - { "scatter_cnt", KSTAT_DATA_UINT64 }, - /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ - { "scatter_data_size", KSTAT_DATA_UINT64 }, - /* - * The amount of space wasted at the end of the last chunk across all - * scatter ABDs tracked by scatter_cnt. - */ - { "scatter_chunk_waste", KSTAT_DATA_UINT64 }, - /* - * The number of linear ABDs which are currently allocated, excluding - * ABDs which don't own their data (for instance the ones which were - * allocated through abd_get_offset() and abd_get_from_buf()). If an - * ABD takes ownership of its buf then it will become tracked. - */ - { "linear_cnt", KSTAT_DATA_UINT64 }, - /* Amount of data stored in all linear ABDs tracked by linear_cnt */ - { "linear_data_size", KSTAT_DATA_UINT64 }, -}; - -#define ABDSTAT(stat) (abd_stats.stat.value.ui64) -#define ABDSTAT_INCR(stat, val) \ - atomic_add_64(&abd_stats.stat.value.ui64, (val)) -#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) -#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) - -/* - * It is possible to make all future ABDs be linear by setting this to B_FALSE. - * Otherwise, ABDs are allocated scattered by default unless the caller uses - * abd_alloc_linear(). - */ -boolean_t zfs_abd_scatter_enabled = B_TRUE; - -/* - * The size of the chunks ABD allocates. Because the sizes allocated from the - * kmem_cache can't change, this tunable can only be modified at boot. Changing - * it at runtime would cause ABD iteration to work incorrectly for ABDs which - * were allocated with the old size, so a safeguard has been put in place which - * will cause the machine to panic if you change it and try to access the data - * within a scattered ABD. - */ -size_t zfs_abd_chunk_size = 4096; - -#if defined(__FreeBSD__) && defined(_KERNEL) -SYSCTL_DECL(_vfs_zfs); - -SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, - &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); -SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, - &zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); -#endif - -#ifdef _KERNEL -extern vmem_t *zio_alloc_arena; -#endif - -kmem_cache_t *abd_chunk_cache; -static kstat_t *abd_ksp; - -extern inline boolean_t abd_is_linear(abd_t *abd); -extern inline void abd_copy(abd_t *dabd, abd_t *sabd, size_t size); -extern inline void abd_copy_from_buf(abd_t *abd, const void *buf, size_t size); -extern inline void abd_copy_to_buf(void* buf, abd_t *abd, size_t size); -extern inline int abd_cmp_buf(abd_t *abd, const void *buf, size_t size); -extern inline void abd_zero(abd_t *abd, size_t size); - -static void * -abd_alloc_chunk() -{ - void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); - ASSERT3P(c, !=, NULL); - return (c); -} - -static void -abd_free_chunk(void *c) -{ - kmem_cache_free(abd_chunk_cache, c); -} - -void -abd_init(void) -{ -#ifdef illumos - vmem_t *data_alloc_arena = NULL; - -#ifdef _KERNEL - data_alloc_arena = zio_alloc_arena; -#endif - - /* - * Since ABD chunks do not appear in crash dumps, we pass KMC_NOTOUCH - * so that no allocator metadata is stored with the buffers. - */ - abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, - NULL, NULL, NULL, NULL, data_alloc_arena, KMC_NOTOUCH); -#else - abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, - NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG); -#endif - abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, - sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); - if (abd_ksp != NULL) { - abd_ksp->ks_data = &abd_stats; - kstat_install(abd_ksp); - } -} - -void -abd_fini(void) -{ - if (abd_ksp != NULL) { - kstat_delete(abd_ksp); - abd_ksp = NULL; - } - - kmem_cache_destroy(abd_chunk_cache); - abd_chunk_cache = NULL; -} - -static inline size_t -abd_chunkcnt_for_bytes(size_t size) -{ - return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); -} - -static inline size_t -abd_scatter_chunkcnt(abd_t *abd) -{ - ASSERT(!abd_is_linear(abd)); - return (abd_chunkcnt_for_bytes( - abd->abd_u.abd_scatter.abd_offset + abd->abd_size)); -} - -static inline void -abd_verify(abd_t *abd) -{ - ASSERT3U(abd->abd_size, >, 0); - ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); - ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | - ABD_FLAG_OWNER | ABD_FLAG_META)); - IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); - IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); - if (abd_is_linear(abd)) { - ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL); - } else { - ASSERT3U(abd->abd_u.abd_scatter.abd_offset, <, - zfs_abd_chunk_size); - size_t n = abd_scatter_chunkcnt(abd); - for (int i = 0; i < n; i++) { - ASSERT3P( - abd->abd_u.abd_scatter.abd_chunks[i], !=, NULL); - } - } -} - -static inline abd_t * -abd_alloc_struct(size_t chunkcnt) -{ - size_t size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); - abd_t *abd = kmem_alloc(size, KM_PUSHPAGE); - ASSERT3P(abd, !=, NULL); - ABDSTAT_INCR(abdstat_struct_size, size); - - return (abd); -} - -static inline void -abd_free_struct(abd_t *abd) -{ - size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd); - int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); - kmem_free(abd, size); - ABDSTAT_INCR(abdstat_struct_size, -size); -} - -/* - * Allocate an ABD, along with its own underlying data buffers. Use this if you - * don't care whether the ABD is linear or not. - */ -abd_t * -abd_alloc(size_t size, boolean_t is_metadata) -{ - if (!zfs_abd_scatter_enabled || size <= zfs_abd_chunk_size) - return (abd_alloc_linear(size, is_metadata)); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - size_t n = abd_chunkcnt_for_bytes(size); - abd_t *abd = abd_alloc_struct(n); - - abd->abd_flags = ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - abd->abd_size = size; - abd->abd_parent = NULL; - zfs_refcount_create(&abd->abd_children); - - abd->abd_u.abd_scatter.abd_offset = 0; - abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; - - for (int i = 0; i < n; i++) { - void *c = abd_alloc_chunk(); - ASSERT3P(c, !=, NULL); - abd->abd_u.abd_scatter.abd_chunks[i] = c; - } - - ABDSTAT_BUMP(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - n * zfs_abd_chunk_size - size); - - return (abd); -} - -static void -abd_free_scatter(abd_t *abd) -{ - size_t n = abd_scatter_chunkcnt(abd); - for (int i = 0; i < n; i++) { - abd_free_chunk(abd->abd_u.abd_scatter.abd_chunks[i]); - } - - zfs_refcount_destroy(&abd->abd_children); - ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - abd->abd_size - n * zfs_abd_chunk_size); - - abd_free_struct(abd); -} - -/* - * Allocate an ABD that must be linear, along with its own underlying data - * buffer. Only use this when it would be very annoying to write your ABD - * consumer with a scattered ABD. - */ -abd_t * -abd_alloc_linear(size_t size, boolean_t is_metadata) -{ - abd_t *abd = abd_alloc_struct(0); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - abd->abd_size = size; - abd->abd_parent = NULL; - zfs_refcount_create(&abd->abd_children); - - if (is_metadata) { - abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size); - } else { - abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size); - } - - ABDSTAT_BUMP(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, size); - - return (abd); -} - -static void -abd_free_linear(abd_t *abd) -{ - if (abd->abd_flags & ABD_FLAG_META) { - zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); - } else { - zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); - } - - zfs_refcount_destroy(&abd->abd_children); - ABDSTAT_BUMPDOWN(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); - - abd_free_struct(abd); -} - -/* - * Free an ABD. Only use this on ABDs allocated with abd_alloc() or - * abd_alloc_linear(). - */ -void -abd_free(abd_t *abd) -{ - abd_verify(abd); - ASSERT3P(abd->abd_parent, ==, NULL); - ASSERT(abd->abd_flags & ABD_FLAG_OWNER); - if (abd_is_linear(abd)) - abd_free_linear(abd); - else - abd_free_scatter(abd); -} - -/* - * Allocate an ABD of the same format (same metadata flag, same scatterize - * setting) as another ABD. - */ -abd_t * -abd_alloc_sametype(abd_t *sabd, size_t size) -{ - boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; - if (abd_is_linear(sabd)) { - return (abd_alloc_linear(size, is_metadata)); - } else { - return (abd_alloc(size, is_metadata)); - } -} - -/* - * If we're going to use this ABD for doing I/O using the block layer, the - * consumer of the ABD data doesn't care if it's scattered or not, and we don't - * plan to store this ABD in memory for a long period of time, we should - * allocate the ABD type that requires the least data copying to do the I/O. - * - * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os - * using a scatter/gather list we should switch to that and replace this call - * with vanilla abd_alloc(). - */ -abd_t * -abd_alloc_for_io(size_t size, boolean_t is_metadata) -{ - return (abd_alloc_linear(size, is_metadata)); -} - -/* - * Allocate a new ABD to point to offset off of sabd. It shares the underlying - * buffer data with sabd. Use abd_put() to free. sabd must not be freed while - * any derived ABDs exist. - */ -abd_t * -abd_get_offset(abd_t *sabd, size_t off) -{ - abd_t *abd; - - abd_verify(sabd); - ASSERT3U(off, <=, sabd->abd_size); - - if (abd_is_linear(sabd)) { - abd = abd_alloc_struct(0); - - /* - * Even if this buf is filesystem metadata, we only track that - * if we own the underlying data buffer, which is not true in - * this case. Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = ABD_FLAG_LINEAR; - - abd->abd_u.abd_linear.abd_buf = - (char *)sabd->abd_u.abd_linear.abd_buf + off; - } else { - size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off; - size_t chunkcnt = abd_scatter_chunkcnt(sabd) - - (new_offset / zfs_abd_chunk_size); - - abd = abd_alloc_struct(chunkcnt); - - /* - * Even if this buf is filesystem metadata, we only track that - * if we own the underlying data buffer, which is not true in - * this case. Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = 0; - - abd->abd_u.abd_scatter.abd_offset = - new_offset % zfs_abd_chunk_size; - abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; - - /* Copy the scatterlist starting at the correct offset */ - (void) memcpy(&abd->abd_u.abd_scatter.abd_chunks, - &sabd->abd_u.abd_scatter.abd_chunks[new_offset / - zfs_abd_chunk_size], - chunkcnt * sizeof (void *)); - } - - abd->abd_size = sabd->abd_size - off; - abd->abd_parent = sabd; - zfs_refcount_create(&abd->abd_children); - (void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd); - - return (abd); -} - -/* - * Allocate a linear ABD structure for buf. You must free this with abd_put() - * since the resulting ABD doesn't own its own buffer. - */ -abd_t * -abd_get_from_buf(void *buf, size_t size) -{ - abd_t *abd = abd_alloc_struct(0); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - /* - * Even if this buf is filesystem metadata, we only track that if we - * own the underlying data buffer, which is not true in this case. - * Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = ABD_FLAG_LINEAR; - abd->abd_size = size; - abd->abd_parent = NULL; - zfs_refcount_create(&abd->abd_children); - - abd->abd_u.abd_linear.abd_buf = buf; - - return (abd); -} - -/* - * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not - * free the underlying scatterlist or buffer. - */ -void -abd_put(abd_t *abd) -{ - abd_verify(abd); - ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); - - if (abd->abd_parent != NULL) { - (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children, - abd->abd_size, abd); - } - - zfs_refcount_destroy(&abd->abd_children); - abd_free_struct(abd); -} - -/* - * Get the raw buffer associated with a linear ABD. - */ -void * -abd_to_buf(abd_t *abd) -{ - ASSERT(abd_is_linear(abd)); - abd_verify(abd); - return (abd->abd_u.abd_linear.abd_buf); -} - -/* - * Borrow a raw buffer from an ABD without copying the contents of the ABD - * into the buffer. If the ABD is scattered, this will allocate a raw buffer - * whose contents are undefined. To copy over the existing data in the ABD, use - * abd_borrow_buf_copy() instead. - */ -void * -abd_borrow_buf(abd_t *abd, size_t n) -{ - void *buf; - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - buf = abd_to_buf(abd); - } else { - buf = zio_buf_alloc(n); - } - (void) zfs_refcount_add_many(&abd->abd_children, n, buf); - - return (buf); -} - -void * -abd_borrow_buf_copy(abd_t *abd, size_t n) -{ - void *buf = abd_borrow_buf(abd, n); - if (!abd_is_linear(abd)) { - abd_copy_to_buf(buf, abd, n); - } - return (buf); -} - -/* - * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will - * not change the contents of the ABD and will ASSERT that you didn't modify - * the buffer since it was borrowed. If you want any changes you made to buf to - * be copied back to abd, use abd_return_buf_copy() instead. - */ -void -abd_return_buf(abd_t *abd, void *buf, size_t n) -{ - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - ASSERT3P(buf, ==, abd_to_buf(abd)); - } else { - ASSERT0(abd_cmp_buf(abd, buf, n)); - zio_buf_free(buf, n); - } - (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); -} - -void -abd_return_buf_copy(abd_t *abd, void *buf, size_t n) -{ - if (!abd_is_linear(abd)) { - abd_copy_from_buf(abd, buf, n); - } - abd_return_buf(abd, buf, n); -} - -/* - * Give this ABD ownership of the buffer that it's storing. Can only be used on - * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated - * with abd_alloc_linear() which subsequently released ownership of their buf - * with abd_release_ownership_of_buf(). - */ -void -abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) -{ - ASSERT(abd_is_linear(abd)); - ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); - abd_verify(abd); - - abd->abd_flags |= ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - - ABDSTAT_BUMP(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); -} - -void -abd_release_ownership_of_buf(abd_t *abd) -{ - ASSERT(abd_is_linear(abd)); - ASSERT(abd->abd_flags & ABD_FLAG_OWNER); - abd_verify(abd); - - abd->abd_flags &= ~ABD_FLAG_OWNER; - /* Disable this flag since we no longer own the data buffer */ - abd->abd_flags &= ~ABD_FLAG_META; - - ABDSTAT_BUMPDOWN(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); -} - -struct abd_iter { - abd_t *iter_abd; /* ABD being iterated through */ - size_t iter_pos; /* position (relative to abd_offset) */ - void *iter_mapaddr; /* addr corresponding to iter_pos */ - size_t iter_mapsize; /* length of data valid at mapaddr */ -}; - -static inline size_t -abd_iter_scatter_chunk_offset(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + - aiter->iter_pos) % zfs_abd_chunk_size); -} - -static inline size_t -abd_iter_scatter_chunk_index(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + - aiter->iter_pos) / zfs_abd_chunk_size); -} - -/* - * Initialize the abd_iter. - */ -static void -abd_iter_init(struct abd_iter *aiter, abd_t *abd) -{ - abd_verify(abd); - aiter->iter_abd = abd; - aiter->iter_pos = 0; - aiter->iter_mapaddr = NULL; - aiter->iter_mapsize = 0; -} - -/* - * Advance the iterator by a certain amount. Cannot be called when a chunk is - * in use. This can be safely called when the aiter has already exhausted, in - * which case this does nothing. - */ -static void -abd_iter_advance(struct abd_iter *aiter, size_t amount) -{ - ASSERT3P(aiter->iter_mapaddr, ==, NULL); - ASSERT0(aiter->iter_mapsize); - - /* There's nothing left to advance to, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - aiter->iter_pos += amount; -} - -/* - * Map the current chunk into aiter. This can be safely called when the aiter - * has already exhausted, in which case this does nothing. - */ -static void -abd_iter_map(struct abd_iter *aiter) -{ - void *paddr; - size_t offset = 0; - - ASSERT3P(aiter->iter_mapaddr, ==, NULL); - ASSERT0(aiter->iter_mapsize); - - /* Panic if someone has changed zfs_abd_chunk_size */ - IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == - aiter->iter_abd->abd_u.abd_scatter.abd_chunk_size); - - /* There's nothing left to iterate over, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - if (abd_is_linear(aiter->iter_abd)) { - offset = aiter->iter_pos; - aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; - paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf; - } else { - size_t index = abd_iter_scatter_chunk_index(aiter); - offset = abd_iter_scatter_chunk_offset(aiter); - aiter->iter_mapsize = zfs_abd_chunk_size - offset; - paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index]; - } - aiter->iter_mapaddr = (char *)paddr + offset; -} - -/* - * Unmap the current chunk from aiter. This can be safely called when the aiter - * has already exhausted, in which case this does nothing. - */ -static void -abd_iter_unmap(struct abd_iter *aiter) -{ - /* There's nothing left to unmap, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - ASSERT3P(aiter->iter_mapaddr, !=, NULL); - ASSERT3U(aiter->iter_mapsize, >, 0); - - aiter->iter_mapaddr = NULL; - aiter->iter_mapsize = 0; -} - -int -abd_iterate_func(abd_t *abd, size_t off, size_t size, - abd_iter_func_t *func, void *private) -{ - int ret = 0; - struct abd_iter aiter; - - abd_verify(abd); - ASSERT3U(off + size, <=, abd->abd_size); - - abd_iter_init(&aiter, abd); - abd_iter_advance(&aiter, off); - - while (size > 0) { - abd_iter_map(&aiter); - - size_t len = MIN(aiter.iter_mapsize, size); - ASSERT3U(len, >, 0); - - ret = func(aiter.iter_mapaddr, len, private); - - abd_iter_unmap(&aiter); - - if (ret != 0) - break; - - size -= len; - abd_iter_advance(&aiter, len); - } - - return (ret); -} - -struct buf_arg { - void *arg_buf; -}; - -static int -abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; - - (void) memcpy(ba_ptr->arg_buf, buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (0); -} - -/* - * Copy abd to buf. (off is the offset in abd.) - */ -void -abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { buf }; - - (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, - &ba_ptr); -} - -static int -abd_cmp_buf_off_cb(void *buf, size_t size, void *private) -{ - int ret; - struct buf_arg *ba_ptr = private; - - ret = memcmp(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (ret); -} - -/* - * Compare the contents of abd to buf. (off is the offset in abd.) - */ -int -abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { (void *) buf }; - - return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); -} - -static int -abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; - - (void) memcpy(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (0); -} - -/* - * Copy from buf to abd. (off is the offset in abd.) - */ -void -abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { (void *) buf }; - - (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, - &ba_ptr); -} - -/*ARGSUSED*/ -static int -abd_zero_off_cb(void *buf, size_t size, void *private) -{ - (void) memset(buf, 0, size); - return (0); -} - -/* - * Zero out the abd from a particular offset to the end. - */ -void -abd_zero_off(abd_t *abd, size_t off, size_t size) -{ - (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); -} - -/* - * Iterate over two ABDs and call func incrementally on the two ABDs' data in - * equal-sized chunks (passed to func as raw buffers). func could be called many - * times during this iteration. - */ -int -abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, - size_t size, abd_iter_func2_t *func, void *private) -{ - int ret = 0; - struct abd_iter daiter, saiter; - - abd_verify(dabd); - abd_verify(sabd); - - ASSERT3U(doff + size, <=, dabd->abd_size); - ASSERT3U(soff + size, <=, sabd->abd_size); - - abd_iter_init(&daiter, dabd); - abd_iter_init(&saiter, sabd); - abd_iter_advance(&daiter, doff); - abd_iter_advance(&saiter, soff); - - while (size > 0) { - abd_iter_map(&daiter); - abd_iter_map(&saiter); - - size_t dlen = MIN(daiter.iter_mapsize, size); - size_t slen = MIN(saiter.iter_mapsize, size); - size_t len = MIN(dlen, slen); - ASSERT(dlen > 0 || slen > 0); - - ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, - private); - - abd_iter_unmap(&saiter); - abd_iter_unmap(&daiter); - - if (ret != 0) - break; - - size -= len; - abd_iter_advance(&daiter, len); - abd_iter_advance(&saiter, len); - } - - return (ret); -} - -/*ARGSUSED*/ -static int -abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private) -{ - (void) memcpy(dbuf, sbuf, size); - return (0); -} - -/* - * Copy from sabd to dabd starting from soff and doff. - */ -void -abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size) -{ - (void) abd_iterate_func2(dabd, sabd, doff, soff, size, - abd_copy_off_cb, NULL); -} - -/*ARGSUSED*/ -static int -abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private) -{ - return (memcmp(bufa, bufb, size)); -} - -/* - * Compares the first size bytes of two ABDs. - */ -int -abd_cmp(abd_t *dabd, abd_t *sabd, size_t size) -{ - return (abd_iterate_func2(dabd, sabd, 0, 0, size, abd_cmp_cb, NULL)); -} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/aggsum.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/aggsum.c deleted file mode 100644 index 713ff2b0116c..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/aggsum.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * CDDL HEADER START - * - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2017, 2018 by Delphix. All rights reserved. - */ - -#include <sys/zfs_context.h> -#include <sys/aggsum.h> - -/* - * Aggregate-sum counters are a form of fanned-out counter, used when atomic - * instructions on a single field cause enough CPU cache line contention to - * slow system performance. Due to their increased overhead and the expense - * involved with precisely reading from them, they should only be used in cases - * where the write rate (increment/decrement) is much higher than the read rate - * (get value). - * - * Aggregate sum counters are comprised of two basic parts, the core and the - * buckets. The core counter contains a lock for the entire counter, as well - * as the current upper and lower bounds on the value of the counter. The - * aggsum_bucket structure contains a per-bucket lock to protect the contents of - * the bucket, the current amount that this bucket has changed from the global - * counter (called the delta), and the amount of increment and decrement we have - * "borrowed" from the core counter. - * - * The basic operation of an aggsum is simple. Threads that wish to modify the - * counter will modify one bucket's counter (determined by their current CPU, to - * help minimize lock and cache contention). If the bucket already has - * sufficient capacity borrowed from the core structure to handle their request, - * they simply modify the delta and return. If the bucket does not, we clear - * the bucket's current state (to prevent the borrowed amounts from getting too - * large), and borrow more from the core counter. Borrowing is done by adding to - * the upper bound (or subtracting from the lower bound) of the core counter, - * and setting the borrow value for the bucket to the amount added (or - * subtracted). Clearing the bucket is the opposite; we add the current delta - * to both the lower and upper bounds of the core counter, subtract the borrowed - * incremental from the upper bound, and add the borrowed decrement from the - * lower bound. Note that only borrowing and clearing require access to the - * core counter; since all other operations access CPU-local resources, - * performance can be much higher than a traditional counter. - * - * Threads that wish to read from the counter have a slightly more challenging - * task. It is fast to determine the upper and lower bounds of the aggum; this - * does not require grabbing any locks. This suffices for cases where an - * approximation of the aggsum's value is acceptable. However, if one needs to - * know whether some specific value is above or below the current value in the - * aggsum, they invoke aggsum_compare(). This function operates by repeatedly - * comparing the target value to the upper and lower bounds of the aggsum, and - * then clearing a bucket. This proceeds until the target is outside of the - * upper and lower bounds and we return a response, or the last bucket has been - * cleared and we know that the target is equal to the aggsum's value. Finally, - * the most expensive operation is determining the precise value of the aggsum. - * To do this, we clear every bucket and then return the upper bound (which must - * be equal to the lower bound). What makes aggsum_compare() and aggsum_value() - * expensive is clearing buckets. This involves grabbing the global lock - * (serializing against themselves and borrow operations), grabbing a bucket's - * lock (preventing threads on those CPUs from modifying their delta), and - * zeroing out the borrowed value (forcing that thread to borrow on its next - * request, which will also be expensive). This is what makes aggsums well - * suited for write-many read-rarely operations. - */ - -/* - * We will borrow aggsum_borrow_multiplier times the current request, so we will - * have to get the as_lock approximately every aggsum_borrow_multiplier calls to - * aggsum_delta(). - */ -static uint_t aggsum_borrow_multiplier = 10; - -void -aggsum_init(aggsum_t *as, uint64_t value) -{ - bzero(as, sizeof (*as)); - as->as_lower_bound = as->as_upper_bound = value; - mutex_init(&as->as_lock, NULL, MUTEX_DEFAULT, NULL); - as->as_numbuckets = boot_ncpus; - as->as_buckets = kmem_zalloc(boot_ncpus * sizeof (aggsum_bucket_t), - KM_SLEEP); - for (int i = 0; i < as->as_numbuckets; i++) { - mutex_init(&as->as_buckets[i].asc_lock, - NULL, MUTEX_DEFAULT, NULL); - } -} - -void -aggsum_fini(aggsum_t *as) -{ - for (int i = 0; i < as->as_numbuckets; i++) - mutex_destroy(&as->as_buckets[i].asc_lock); - kmem_free(as->as_buckets, as->as_numbuckets * sizeof (aggsum_bucket_t)); - mutex_destroy(&as->as_lock); -} - -int64_t -aggsum_lower_bound(aggsum_t *as) -{ - return (as->as_lower_bound); -} - -int64_t -aggsum_upper_bound(aggsum_t *as) -{ - return (as->as_upper_bound); -} - -static void -aggsum_flush_bucket(aggsum_t *as, struct aggsum_bucket *asb) -{ - ASSERT(MUTEX_HELD(&as->as_lock)); - ASSERT(MUTEX_HELD(&asb->asc_lock)); - - /* - * We use atomic instructions for this because we read the upper and - * lower bounds without the lock, so we need stores to be atomic. - */ - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, - asb->asc_delta + asb->asc_borrowed); - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, - asb->asc_delta - asb->asc_borrowed); - asb->asc_delta = 0; - asb->asc_borrowed = 0; -} - -uint64_t -aggsum_value(aggsum_t *as) -{ - int64_t rv; - - mutex_enter(&as->as_lock); - if (as->as_lower_bound == as->as_upper_bound) { - rv = as->as_lower_bound; - for (int i = 0; i < as->as_numbuckets; i++) { - ASSERT0(as->as_buckets[i].asc_delta); - ASSERT0(as->as_buckets[i].asc_borrowed); - } - mutex_exit(&as->as_lock); - return (rv); - } - for (int i = 0; i < as->as_numbuckets; i++) { - struct aggsum_bucket *asb = &as->as_buckets[i]; - mutex_enter(&asb->asc_lock); - aggsum_flush_bucket(as, asb); - mutex_exit(&asb->asc_lock); - } - VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound); - rv = as->as_lower_bound; - mutex_exit(&as->as_lock); - - return (rv); -} - -void -aggsum_add(aggsum_t *as, int64_t delta) -{ - struct aggsum_bucket *asb = - &as->as_buckets[CPU_SEQID % as->as_numbuckets]; - int64_t borrow; - - /* Try fast path if we already borrowed enough before. */ - mutex_enter(&asb->asc_lock); - if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed && - asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) { - asb->asc_delta += delta; - mutex_exit(&asb->asc_lock); - return; - } - mutex_exit(&asb->asc_lock); - - /* - * We haven't borrowed enough. Take the global lock and borrow - * considering what is requested now and what we borrowed before. - */ - borrow = (delta < 0 ? -delta : delta) * aggsum_borrow_multiplier; - mutex_enter(&as->as_lock); - mutex_enter(&asb->asc_lock); - delta += asb->asc_delta; - asb->asc_delta = 0; - if (borrow >= asb->asc_borrowed) - borrow -= asb->asc_borrowed; - else - borrow = (borrow - (int64_t)asb->asc_borrowed) / 4; - asb->asc_borrowed += borrow; - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, - delta - borrow); - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, - delta + borrow); - mutex_exit(&asb->asc_lock); - mutex_exit(&as->as_lock); -} - -/* - * Compare the aggsum value to target efficiently. Returns -1 if the value - * represented by the aggsum is less than target, 1 if it's greater, and 0 if - * they are equal. - */ -int -aggsum_compare(aggsum_t *as, uint64_t target) -{ - if (as->as_upper_bound < target) - return (-1); - if (as->as_lower_bound > target) - return (1); - mutex_enter(&as->as_lock); - for (int i = 0; i < as->as_numbuckets; i++) { - struct aggsum_bucket *asb = &as->as_buckets[i]; - mutex_enter(&asb->asc_lock); - aggsum_flush_bucket(as, asb); - mutex_exit(&asb->asc_lock); - if (as->as_upper_bound < target) { - mutex_exit(&as->as_lock); - return (-1); - } - if (as->as_lower_bound > target) { - mutex_exit(&as->as_lock); - return (1); - } - } - VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound); - ASSERT3U(as->as_lower_bound, ==, target); - mutex_exit(&as->as_lock); - return (0); -} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c deleted file mode 100644 index 592fb02cfac1..000000000000 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ /dev/null @@ -1,8569 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. - */ - -/* - * DVA-based Adjustable Replacement Cache - * - * While much of the theory of operation used here is - * based on the self-tuning, low overhead replacement cache - * presented by Megiddo and Modha at FAST 2003, there are some - * significant differences: - * - * 1. The Megiddo and Modha model assumes any page is evictable. - * Pages in its cache cannot be "locked" into memory. This makes - * the eviction algorithm simple: evict the last page in the list. - * This also make the performance characteristics easy to reason - * about. Our cache is not so simple. At any given moment, some - * subset of the blocks in the cache are un-evictable because we - * have handed out a reference to them. Blocks are only evictable - * when there are no external references active. This makes - * eviction far more problematic: we choose to evict the evictable - * blocks that are the "lowest" in the list. - * - * There are times when it is not possible to evict the requested - * space. In these circumstances we are unable to adjust the cache - * size. To prevent the cache growing unbounded at these times we - * implement a "cache throttle" that slows the flow of new data - * into the cache until we can make space available. - * - * 2. The Megiddo and Modha model assumes a fixed cache size. - * Pages are evicted when the cache is full and there is a cache - * miss. Our model has a variable sized cache. It grows with - * high use, but also tries to react to memory pressure from the - * operating system: decreasing its size when system memory is - * tight. - * - * 3. The Megiddo and Modha model assumes a fixed page size. All - * elements of the cache are therefore exactly the same size. So - * when adjusting the cache size following a cache miss, its simply - * a matter of choosing a single page to evict. In our model, we - * have variable sized cache blocks (rangeing from 512 bytes to - * 128K bytes). We therefore choose a set of blocks to evict to make - * space for a cache miss that approximates as closely as possible - * the space used by the new block. - * - * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache" - * by N. Megiddo & D. Modha, FAST 2003 - */ - -/* - * The locking model: - * - * A new reference to a cache buffer can be obtained in two - * ways: 1) via a hash table lookup using the DVA as a key, - * or 2) via one of the ARC lists. The arc_read() interface - * uses method 1, while the internal ARC algorithms for - * adjusting the cache use method 2. We therefore provide two - * types of locks: 1) the hash table lock array, and 2) the - * ARC list locks. - * - * Buffers do not have their own mutexes, rather they rely on the - * hash table mutexes for the bulk of their protection (i.e. most - * fields in the arc_buf_hdr_t are protected by these mutexes). - * - * buf_hash_find() returns the appropriate mutex (held) when it - * locates the requested buffer in the hash table. It returns - * NULL for the mutex if the buffer was not in the table. - * - * buf_hash_remove() expects the appropriate hash mutex to be - * already held before it is invoked. - * - * Each ARC state also has a mutex which is used to protect the - * buffer list associated with the state. When attempting to - * obtain a hash table lock while holding an ARC list lock you - * must use: mutex_tryenter() to avoid deadlock. Also note that - * the active state mutex must be held before the ghost state mutex. - * - * It as also possible to register a callback which is run when the - * arc_meta_limit is reached and no buffers can be safely evicted. In - * this case the arc user should drop a reference on some arc buffers so - * they can be reclaimed and the arc_meta_limit honored. For example, - * when using the ZPL each dentry holds a references on a znode. These - * dentries must be pruned before the arc buffer holding the znode can - * be safely evicted. - * - * Note that the majority of the performance stats are manipulated - * with atomic operations. - * - * The L2ARC uses the l2ad_mtx on each vdev for the following: - * - * - L2ARC buflist creation - * - L2ARC buflist eviction - * - L2ARC write completion, which walks L2ARC buflists - * - ARC header destruction, as it removes from L2ARC buflists - * - ARC header release, as it removes from L2ARC buflists - */ - -/* - * ARC operation: - * - * Every block that is in the ARC is tracked by an arc_buf_hdr_t structure. - * This structure can point either to a block that is still in the cache or to - * one that is only accessible in an L2 ARC device, or it can provide - * information about a block that was recently evicted. If a block is - * only accessible in the L2ARC, then the arc_buf_hdr_t only has enough - * information to retrieve it from the L2ARC device. This information is - * stored in the l2arc_buf_hdr_t sub-structure of the arc_buf_hdr_t. A block - * that is in this state cannot access the data directly. - * - * Blocks that are actively being referenced or have not been evicted - * are cached in the L1ARC. The L1ARC (l1arc_buf_hdr_t) is a structure within - * the arc_buf_hdr_t that will point to the data block in memory. A block can - * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC - * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and - * also in the arc_buf_hdr_t's private physical data block pointer (b_pabd). - * - * The L1ARC's data pointer may or may not be uncompressed. The ARC has the - * ability to store the physical data (b_pabd) associated with the DVA of the - * arc_buf_hdr_t. Since the b_pabd is a copy of the on-disk physical block, - * it will match its on-disk compression characteristics. This behavior can be - * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the - * compressed ARC functionality is disabled, the b_pabd will point to an - * uncompressed version of the on-disk data. - * - * Data in the L1ARC is not accessed by consumers of the ARC directly. Each - * arc_buf_hdr_t can have multiple ARC buffers (arc_buf_t) which reference it. - * Each ARC buffer (arc_buf_t) is being actively accessed by a specific ARC - * consumer. The ARC will provide references to this data and will keep it - * cached until it is no longer in use. The ARC caches only the L1ARC's physical - * data block and will evict any arc_buf_t that is no longer referenced. The - * amount of memory consumed by the arc_buf_ts' data buffers can be seen via the - * "overhead_size" kstat. - * - * Depending on the consumer, an arc_buf_t can be requested in uncompressed or - * compressed form. The typical case is that consumers will want uncompressed - * data, and when that happens a new data buffer is allocated where the data is - * decompressed for them to use. Currently the only consumer who wants - * compressed arc_buf_t's is "zfs send", when it streams data exactly as it - * exists on disk. When this happens, the arc_buf_t's data buffer is shared - * with the arc_buf_hdr_t. - * - * Here is a diagram showing an arc_buf_hdr_t referenced by two arc_buf_t's. The - * first one is owned by a compressed send consumer (and therefore references - * the same compressed data buffer as the arc_buf_hdr_t) and the second could be - * used by any other consumer (and has its own uncompressed copy of the data - * buffer). - * - * arc_buf_hdr_t - * +-----------+ - * | fields | - * | common to | - * | L1- and | - * | L2ARC | - * +-----------+ - * | l2arc_buf_hdr_t - * | | - * +-----------+ - * | l1arc_buf_hdr_t - * | | arc_buf_t - * | b_buf +------------>+-----------+ arc_buf_t - * | b_pabd +-+ |b_next +---->+-----------+ - * +-----------+ | |-----------| |b_next +-->NULL - * | |b_comp = T | +-----------+ - * | |b_data +-+ |b_comp = F | - * | +-----------+ | |b_data +-+ - * +->+------+ | +-----------+ | - * compressed | | | | - * data | |<--------------+ | uncompressed - * +------+ compressed, | data - * shared +-->+------+ - * data | | - * | | - * +------+ - * - * When a consumer reads a block, the ARC must first look to see if the - * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new - * arc_buf_t and either copies uncompressed data into a new data buffer from an - * existing uncompressed arc_buf_t, decompresses the hdr's b_pabd buffer into a - * new data buffer, or shares the hdr's b_pabd buffer, depending on whether the - * hdr is compressed and the desired compression characteristics of the - * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the - * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be - * the last buffer in the hdr's b_buf list, however a shared compressed buf can - * be anywhere in the hdr's list. - * - * The diagram below shows an example of an uncompressed ARC hdr that is - * sharing its data with an arc_buf_t (note that the shared uncompressed buf is - * the last element in the buf list): - * - * arc_buf_hdr_t - * +-----------+ - * | | - * | | - * | | - * +-----------+ - * l2arc_buf_hdr_t| | - * | | - * +-----------+ - * l1arc_buf_hdr_t| | - * | | arc_buf_t (shared) - * | b_buf +------------>+---------+ arc_buf_t - * | | |b_next +---->+---------+ - * | b_pabd +-+ |---------| |b_next +-->NULL - * +-----------+ | | | +---------+ - * | |b_data +-+ | | - * | +---------+ | |b_data +-+ - * +->+------+ | +---------+ | - * | | | | - * uncompressed | | | | - * data +------+ | | - * ^ +->+------+ | - * | uncompressed | | | - * | data | | | - * | +------+ | - * +---------------------------------+ - * - * Writing to the ARC requires that the ARC first discard the hdr's b_pabd - * since the physical block is about to be rewritten. The new data contents - * will be contained in the arc_buf_t. As the I/O pipeline performs the write, - * it may compress the data before writing it to disk. The ARC will be called - * with the transformed data and will bcopy the transformed on-disk block into - * a newly allocated b_pabd. Writes are always done into buffers which have - * either been loaned (and hence are new and don't have other readers) or - * buffers which have been released (and hence have their own hdr, if there - * were originally other readers of the buf's original hdr). This ensures that - * the ARC only needs to update a single buf and its hdr after a write occurs. - * - * When the L2ARC is in use, it will also take advantage of the b_pabd. The - * L2ARC will always write the contents of b_pabd to the L2ARC. This means - * that when compressed ARC is enabled that the L2ARC blocks are identical - * to the on-disk block in the main data pool. This provides a significant - * advantage since the ARC can leverage the bp's checksum when reading from the - * L2ARC to determine if the contents are valid. However, if the compressed - * ARC is disabled, then the L2ARC's block must be transformed to look - * like the physical block in the main data pool before comparing the - * checksum and determining its validity. - */ - -#include <sys/spa.h> -#include <sys/zio.h> -#include <sys/spa_impl.h> -#include <sys/zio_compress.h> -#include <sys/zio_checksum.h> -#include <sys/zfs_context.h> -#include <sys/arc.h> -#include <sys/refcount.h> -#include <sys/vdev.h> -#include <sys/vdev_impl.h> -#include <sys/dsl_pool.h> -#include <sys/zio_checksum.h> -#include <sys/multilist.h> -#include <sys/abd.h> -#ifdef _KERNEL -#include <sys/dnlc.h> -#include <sys/racct.h> -#endif -#include <sys/callb.h> -#include <sys/kstat.h> -#include <sys/trim_map.h> -#include <sys/zthr.h> -#include <zfs_fletcher.h> -#include <sys/sdt.h> -#include <sys/aggsum.h> -#include <sys/cityhash.h> - -#include <machine/vmparam.h> - -#ifdef illumos -#ifndef _KERNEL -/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */ -boolean_t arc_watch = B_FALSE; -int arc_procfd; -#endif -#endif /* illumos */ - -/* - * This thread's job is to keep enough free memory in the system, by - * calling arc_kmem_reap_now() plus arc_shrink(), which improves - * arc_available_memory(). - */ -static zthr_t *arc_reap_zthr; - -/* - * This thread's job is to keep arc_size under arc_c, by calling - * arc_adjust(), which improves arc_is_overflowing(). - */ -static zthr_t *arc_adjust_zthr; - -static kmutex_t arc_adjust_lock; -static kcondvar_t arc_adjust_waiters_cv; -static boolean_t arc_adjust_needed = B_FALSE; - -static kmutex_t arc_dnlc_evicts_lock; -static kcondvar_t arc_dnlc_evicts_cv; -static boolean_t arc_dnlc_evicts_thread_exit; - -uint_t arc_reduce_dnlc_percent = 3; - -/* - * The number of headers to evict in arc_evict_state_impl() before - * dropping the sublist lock and evicting from another sublist. A lower - * value means we're more likely to evict the "correct" header (i.e. the - * oldest header in the arc state), but comes with higher overhead - * (i.e. more invocations of arc_evict_state_impl()). - */ -int zfs_arc_evict_batch_limit = 10; - -/* number of seconds before growing cache again */ -int arc_grow_retry = 60; - -/* - * Minimum time between calls to arc_kmem_reap_soon(). Note that this will - * be converted to ticks, so with the default hz=100, a setting of 15 ms - * will actually wait 2 ticks, or 20ms. - */ -int arc_kmem_cache_reap_retry_ms = 1000; - -/* shift of arc_c for calculating overflow limit in arc_get_data_impl */ -int zfs_arc_overflow_shift = 8; - -/* shift of arc_c for calculating both min and max arc_p */ -int arc_p_min_shift = 4; - -/* log2(fraction of arc to reclaim) */ -int arc_shrink_shift = 7; - -/* - * log2(fraction of ARC which must be free to allow growing). - * I.e. If there is less than arc_c >> arc_no_grow_shift free memory, - * when reading a new block into the ARC, we will evict an equal-sized block - * from the ARC. - * - * This must be less than arc_shrink_shift, so that when we shrink the ARC, - * we will still not allow it to grow. - */ -int arc_no_grow_shift = 5; - - -/* - * minimum lifespan of a prefetch block in clock ticks - * (initialized in arc_init()) - */ -static int zfs_arc_min_prefetch_ms = 1; -static int zfs_arc_min_prescient_prefetch_ms = 6; - -/* - * If this percent of memory is free, don't throttle. - */ -int arc_lotsfree_percent = 10; - -static boolean_t arc_initialized; -extern boolean_t zfs_prefetch_disable; - -/* - * The arc has filled available memory and has now warmed up. - */ -static boolean_t arc_warm; - -/* - * log2 fraction of the zio arena to keep free. - */ -int arc_zio_arena_free_shift = 2; - -/* - * These tunables are for performance analysis. - */ -uint64_t zfs_arc_max; -uint64_t zfs_arc_min; -uint64_t zfs_arc_meta_limit = 0; -uint64_t zfs_arc_meta_min = 0; -uint64_t zfs_arc_dnode_limit = 0; -uint64_t zfs_arc_dnode_reduce_percent = 10; -int zfs_arc_grow_retry = 0; -int zfs_arc_shrink_shift = 0; -int zfs_arc_no_grow_shift = 0; -int zfs_arc_p_min_shift = 0; -uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ -u_int zfs_arc_free_target = 0; - -/* Absolute min for arc min / max is 16MB. */ -static uint64_t arc_abs_min = 16 << 20; - -/* - * ARC dirty data constraints for arc_tempreserve_space() throttle - */ -uint_t zfs_arc_dirty_limit_percent = 50; /* total dirty data limit */ -uint_t zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */ -uint_t zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */ - -boolean_t zfs_compressed_arc_enabled = B_TRUE; - -static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); -static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); -static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); -static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); -static int sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS); - -#if defined(__FreeBSD__) && defined(_KERNEL) -static void -arc_free_target_init(void *unused __unused) -{ - - zfs_arc_free_target = vm_cnt.v_free_target; -} -SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, - arc_free_target_init, NULL); - -TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); -TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min); -TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift); -TUNABLE_INT("vfs.zfs.arc_grow_retry", &zfs_arc_grow_retry); -TUNABLE_INT("vfs.zfs.arc_no_grow_shift", &zfs_arc_no_grow_shift); -SYSCTL_DECL(_vfs_zfs); -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, - CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, - 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size"); -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, - CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, - 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size"); -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, - CTLTYPE_U32 | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, - 0, sizeof(uint32_t), sysctl_vfs_zfs_arc_no_grow_shift, "U", - "log2(fraction of ARC which must be free to allow growing)"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, - &zfs_arc_average_blocksize, 0, - "ARC average blocksize"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_shrink_shift, CTLFLAG_RW, - &arc_shrink_shift, 0, - "log2(fraction of arc to reclaim)"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_grow_retry, CTLFLAG_RW, - &arc_grow_retry, 0, - "Wait in seconds before considering growing ARC"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RDTUN, - &zfs_compressed_arc_enabled, 0, - "Enable compressed ARC"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_kmem_cache_reap_retry_ms, CTLFLAG_RWTUN, - &arc_kmem_cache_reap_retry_ms, 0, - "Interval between ARC kmem_cache reapings"); - -/* - * We don't have a tunable for arc_free_target due to the dependency on - * pagedaemon initialisation. - */ -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, - CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), - sysctl_vfs_zfs_arc_free_target, "IU", - "Desired number of free pages below which ARC triggers reclaim"); - -static int -sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) -{ - u_int val; - int err; - - val = zfs_arc_free_target; - err = sysctl_handle_int(oidp, &val, 0, req); - if (err != 0 || req->newptr == NULL) - return (err); - - if (val < minfree) - return (EINVAL); - if (val > vm_cnt.v_page_count) - return (EINVAL); - - zfs_arc_free_target = val; - - return (0); -} - -/* - * Must be declared here, before the definition of corresponding kstat - * macro which uses the same names will confuse the compiler. - */ -SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta_limit, - CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), - sysctl_vfs_zfs_arc_meta_limit, "QU", - "ARC metadata limit"); -#endif - -/* - * Note that buffers can be in one of 6 states: - * ARC_anon - anonymous (discussed below) - * ARC_mru - recently used, currently cached - * ARC_mru_ghost - recentely used, no longer in cache - * ARC_mfu - frequently used, currently cached - * ARC_mfu_ghost - frequently used, no longer in cache - * ARC_l2c_only - exists in L2ARC but not other states - * When there are no active references to the buffer, they are - * are linked onto a list in one of these arc states. These are - * the only buffers that can be evicted or deleted. Within each - * state there are multiple lists, one for meta-data and one for - * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, - * etc.) is tracked separately so that it can be managed more - * explicitly: favored over data, limited explicitly. - * - * Anonymous buffers are buffers that are not associated with - * a DVA. These are buffers that hold dirty block copies - * before they are written to stable storage. By definition, - * they are "ref'd" and are considered part of arc_mru - * that cannot be freed. Generally, they will aquire a DVA - * as they are written and migrate onto the arc_mru list. - * - * The ARC_l2c_only state is for buffers that are in the second - * level ARC but no longer in any of the ARC_m* lists. The second - * level ARC itself may also contain buffers that are in any of - * the ARC_m* states - meaning that a buffer can exist in two - * places. The reason for the ARC_l2c_only state is to keep the - * buffer header in the hash table, so that reads that hit the - * second level ARC benefit from these fast lookups. - */ - -typedef struct arc_state { - /* - * list of evictable buffers - */ - multilist_t *arcs_list[ARC_BUFC_NUMTYPES]; - /* - * total amount of evictable data in this state - */ - zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES]; - /* - * total amount of data in this state; this includes: evictable, - * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. - */ - zfs_refcount_t arcs_size; - /* - * supports the "dbufs" kstat - */ - arc_state_type_t arcs_state; -} arc_state_t; - -/* - * Percentage that can be consumed by dnodes of ARC meta buffers. - */ -int zfs_arc_meta_prune = 10000; -unsigned long zfs_arc_dnode_limit_percent = 10; -int zfs_arc_meta_strategy = ARC_STRATEGY_META_ONLY; -int zfs_arc_meta_adjust_restarts = 4096; - -SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_meta_strategy, CTLFLAG_RWTUN, - &zfs_arc_meta_strategy, 0, - "ARC metadata reclamation strategy " - "(0 = metadata only, 1 = balance data and metadata)"); - -/* The 6 states: */ -static arc_state_t ARC_anon; -static arc_state_t ARC_mru; -static arc_state_t ARC_mru_ghost; -static arc_state_t ARC_mfu; -static arc_state_t ARC_mfu_ghost; -static arc_state_t ARC_l2c_only; - -typedef struct arc_stats { - kstat_named_t arcstat_hits; - kstat_named_t arcstat_misses; - kstat_named_t arcstat_demand_data_hits; - kstat_named_t arcstat_demand_data_misses; - kstat_named_t arcstat_demand_metadata_hits; - kstat_named_t arcstat_demand_metadata_misses; - kstat_named_t arcstat_prefetch_data_hits; - kstat_named_t arcstat_prefetch_data_misses; - kstat_named_t arcstat_prefetch_metadata_hits; - kstat_named_t arcstat_prefetch_metadata_misses; - kstat_named_t arcstat_mru_hits; - kstat_named_t arcstat_mru_ghost_hits; - kstat_named_t arcstat_mfu_hits; - kstat_named_t arcstat_mfu_ghost_hits; - kstat_named_t arcstat_allocated; - kstat_named_t arcstat_deleted; - /* - * Number of buffers that could not be evicted because the hash lock - * was held by another thread. The lock may not necessarily be held - * by something using the same buffer, since hash locks are shared - * by multiple buffers. - */ - kstat_named_t arcstat_mutex_miss; - /* - * Number of buffers skipped when updating the access state due to the - * header having already been released after acquiring the hash lock. - */ - kstat_named_t arcstat_access_skip; - /* - * Number of buffers skipped because they have I/O in progress, are - * indirect prefetch buffers that have not lived long enough, or are - * not from the spa we're trying to evict from. - */ - kstat_named_t arcstat_evict_skip; - /* - * Number of times arc_evict_state() was unable to evict enough - * buffers to reach it's target amount. - */ - kstat_named_t arcstat_evict_not_enough; - kstat_named_t arcstat_evict_l2_cached; - kstat_named_t arcstat_evict_l2_eligible; - kstat_named_t arcstat_evict_l2_ineligible; - kstat_named_t arcstat_evict_l2_skip; - kstat_named_t arcstat_hash_elements; - kstat_named_t arcstat_hash_elements_max; - kstat_named_t arcstat_hash_collisions; - kstat_named_t arcstat_hash_chains; - kstat_named_t arcstat_hash_chain_max; - kstat_named_t arcstat_p; - kstat_named_t arcstat_c; - kstat_named_t arcstat_c_min; - kstat_named_t arcstat_c_max; - /* Not updated directly; only synced in arc_kstat_update. */ - kstat_named_t arcstat_size; - /* - * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. - * Note that the compressed bytes may match the uncompressed bytes - * if the block is either not compressed or compressed arc is disabled. - */ - kstat_named_t arcstat_compressed_size; - /* - * Uncompressed size of the data stored in b_pabd. If compressed - * arc is disabled then this value will be identical to the stat - * above. - */ - kstat_named_t arcstat_uncompressed_size; - /* - * Number of bytes stored in all the arc_buf_t's. This is classified - * as "overhead" since this data is typically short-lived and will - * be evicted from the arc when it becomes unreferenced unless the - * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level - * values have been set (see comment in dbuf.c for more information). - */ - kstat_named_t arcstat_overhead_size; - /* - * Number of bytes consumed by internal ARC structures necessary - * for tracking purposes; these structures are not actually - * backed by ARC buffers. This includes arc_buf_hdr_t structures - * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only - * caches), and arc_buf_t structures (allocated via arc_buf_t - * cache). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_hdr_size; - /* - * Number of bytes consumed by ARC buffers of type equal to - * ARC_BUFC_DATA. This is generally consumed by buffers backing - * on disk user data (e.g. plain file contents). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_data_size; - /* - * Number of bytes consumed by ARC buffers of type equal to - * ARC_BUFC_METADATA. This is generally consumed by buffers - * backing on disk data that is used for internal ZFS - * structures (e.g. ZAP, dnode, indirect blocks, etc). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_metadata_size; - /* - * Number of bytes consumed by dmu_buf_impl_t objects. - */ - kstat_named_t arcstat_dbuf_size; - /* - * Number of bytes consumed by dnode_t objects. - */ - kstat_named_t arcstat_dnode_size; - /* - * Number of bytes consumed by bonus buffers. - */ - kstat_named_t arcstat_bonus_size; -#if defined(__FreeBSD__) && defined(COMPAT_FREEBSD11) - /* - * Sum of the previous three counters, provided for compatibility. - */ - kstat_named_t arcstat_other_size; -#endif - /* - * Total number of bytes consumed by ARC buffers residing in the - * arc_anon state. This includes *all* buffers in the arc_anon - * state; e.g. data, metadata, evictable, and unevictable buffers - * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_anon_size; - /* - * Number of bytes consumed by ARC buffers that meet the - * following criteria: backing buffers of type ARC_BUFC_DATA, - * residing in the arc_anon state, and are eligible for eviction - * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_anon_evictable_data; - /* - * Number of bytes consumed by ARC buffers that meet the - * following criteria: backing buffers of type ARC_BUFC_METADATA, - * residing in the arc_anon state, and are eligible for eviction - * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_anon_evictable_metadata; - /* - * Total number of bytes consumed by ARC buffers residing in the - * arc_mru state. This includes *all* buffers in the arc_mru - * state; e.g. data, metadata, evictable, and unevictable buffers - * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_size; - /* - * Number of bytes consumed by ARC buffers that meet the - * following criteria: backing buffers of type ARC_BUFC_DATA, - * residing in the arc_mru state, and are eligible for eviction - * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_evictable_data; - /* - * Number of bytes consumed by ARC buffers that meet the - * following criteria: backing buffers of type ARC_BUFC_METADATA, - * residing in the arc_mru state, and are eligible for eviction - * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_evictable_metadata; - /* - * Total number of bytes that *would have been* consumed by ARC - * buffers in the arc_mru_ghost state. The key thing to note - * here, is the fact that this size doesn't actually indicate - * RAM consumption. The ghost lists only consist of headers and - * don't actually have ARC buffers linked off of these headers. - * Thus, *if* the headers had associated ARC buffers, these - * buffers *would have* consumed this number of bytes. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_ghost_size; - /* - * Number of bytes that *would have been* consumed by ARC - * buffers that are eligible for eviction, of type - * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_ghost_evictable_data; - /* - * Number of bytes that *would have been* consumed by ARC - * buffers that are eligible for eviction, of type - * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mru_ghost_evictable_metadata; - /* - * Total number of bytes consumed by ARC buffers residing in the - * arc_mfu state. This includes *all* buffers in the arc_mfu - * state; e.g. data, metadata, evictable, and unevictable buffers - * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_size; - /* - * Number of bytes consumed by ARC buffers that are eligible for - * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu - * state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_evictable_data; - /* - * Number of bytes consumed by ARC buffers that are eligible for - * eviction, of type ARC_BUFC_METADATA, and reside in the - * arc_mfu state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_evictable_metadata; - /* - * Total number of bytes that *would have been* consumed by ARC - * buffers in the arc_mfu_ghost state. See the comment above - * arcstat_mru_ghost_size for more details. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_ghost_size; - /* - * Number of bytes that *would have been* consumed by ARC - * buffers that are eligible for eviction, of type - * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_ghost_evictable_data; - /* - * Number of bytes that *would have been* consumed by ARC - * buffers that are eligible for eviction, of type - * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. - */ - kstat_named_t arcstat_mfu_ghost_evictable_metadata; - kstat_named_t arcstat_l2_hits; - kstat_named_t arcstat_l2_misses; - kstat_named_t arcstat_l2_feeds; - kstat_named_t arcstat_l2_rw_clash; - kstat_named_t arcstat_l2_read_bytes; - kstat_named_t arcstat_l2_write_bytes; - kstat_named_t arcstat_l2_writes_sent; - kstat_named_t arcstat_l2_writes_done; - kstat_named_t arcstat_l2_writes_error; - kstat_named_t arcstat_l2_writes_lock_retry; - kstat_named_t arcstat_l2_evict_lock_retry; - kstat_named_t arcstat_l2_evict_reading; - kstat_named_t arcstat_l2_evict_l1cached; - kstat_named_t arcstat_l2_free_on_write; - kstat_named_t arcstat_l2_abort_lowmem; - kstat_named_t arcstat_l2_cksum_bad; - kstat_named_t arcstat_l2_io_error; - kstat_named_t arcstat_l2_lsize; - kstat_named_t arcstat_l2_psize; - /* Not updated directly; only synced in arc_kstat_update. */ - kstat_named_t arcstat_l2_hdr_size; - kstat_named_t arcstat_l2_write_trylock_fail; - kstat_named_t arcstat_l2_write_passed_headroom; - kstat_named_t arcstat_l2_write_spa_mismatch; - kstat_named_t arcstat_l2_write_in_l2; - kstat_named_t arcstat_l2_write_hdr_io_in_progress; - kstat_named_t arcstat_l2_write_not_cacheable; - kstat_named_t arcstat_l2_write_full; - kstat_named_t arcstat_l2_write_buffer_iter; - kstat_named_t arcstat_l2_write_pios; - kstat_named_t arcstat_l2_write_buffer_bytes_scanned; - kstat_named_t arcstat_l2_write_buffer_list_iter; - kstat_named_t arcstat_l2_write_buffer_list_null_iter; - kstat_named_t arcstat_memory_throttle_count; - kstat_named_t arcstat_memory_direct_count; - kstat_named_t arcstat_memory_indirect_count; - kstat_named_t arcstat_memory_all_bytes; - kstat_named_t arcstat_memory_free_bytes; - kstat_named_t arcstat_memory_available_bytes; - kstat_named_t arcstat_no_grow; - kstat_named_t arcstat_tempreserve; - kstat_named_t arcstat_loaned_bytes; - kstat_named_t arcstat_prune; - /* Not updated directly; only synced in arc_kstat_update. */ - kstat_named_t arcstat_meta_used; - kstat_named_t arcstat_meta_limit; - kstat_named_t arcstat_dnode_limit; - kstat_named_t arcstat_meta_max; - kstat_named_t arcstat_meta_min; - kstat_named_t arcstat_async_upgrade_sync; - kstat_named_t arcstat_demand_hit_predictive_prefetch; - kstat_named_t arcstat_demand_hit_prescient_prefetch; -} arc_stats_t; - -static arc_stats_t arc_stats = { - { "hits", KSTAT_DATA_UINT64 }, - { "misses", KSTAT_DATA_UINT64 }, - { "demand_data_hits", KSTAT_DATA_UINT64 }, - { "demand_data_misses", KSTAT_DATA_UINT64 }, - { "demand_metadata_hits", KSTAT_DATA_UINT64 }, - { "demand_metadata_misses", KSTAT_DATA_UINT64 }, - { "prefetch_data_hits", KSTAT_DATA_UINT64 }, - { "prefetch_data_misses", KSTAT_DATA_UINT64 }, - { "prefetch_metadata_hits", KSTAT_DATA_UINT64 }, - { "prefetch_metadata_misses", KSTAT_DATA_UINT64 }, - { "mru_hits", KSTAT_DATA_UINT64 }, - { "mru_ghost_hits", KSTAT_DATA_UINT64 }, - { "mfu_hits", KSTAT_DATA_UINT64 }, - { "mfu_ghost_hits", KSTAT_DATA_UINT64 }, - { "allocated", KSTAT_DATA_UINT64 }, - { "deleted", KSTAT_DATA_UINT64 }, - { "mutex_miss", KSTAT_DATA_UINT64 }, - { "access_skip", KSTAT_DATA_UINT64 }, - { "evict_skip", KSTAT_DATA_UINT64 }, - { "evict_not_enough", KSTAT_DATA_UINT64 }, - { "evict_l2_cached", KSTAT_DATA_UINT64 }, - { "evict_l2_eligible", KSTAT_DATA_UINT64 }, - { "evict_l2_ineligible", KSTAT_DATA_UINT64 }, - { "evict_l2_skip", KSTAT_DATA_UINT64 }, - { "hash_elements", KSTAT_DATA_UINT64 }, - { "hash_elements_max", KSTAT_DATA_UINT64 }, - { "hash_collisions", KSTAT_DATA_UINT64 }, - { "hash_chains", KSTAT_DATA_UINT64 }, - { "hash_chain_max", KSTAT_DATA_UINT64 }, - { "p", KSTAT_DATA_UINT64 }, - { "c", KSTAT_DATA_UINT64 }, - { "c_min", KSTAT_DATA_UINT64 }, - { "c_max", KSTAT_DATA_UINT64 }, - { "size", KSTAT_DATA_UINT64 }, - { "compressed_size", KSTAT_DATA_UINT64 }, - { "uncompressed_size", KSTAT_DATA_UINT64 }, - { "overhead_size", KSTAT_DATA_UINT64 }, - { "hdr_size", KSTAT_DATA_UINT64 }, - { "data_size", KSTAT_DATA_UINT64 }, - { "metadata_size", KSTAT_DATA_UINT64 }, - { "dbuf_size", KSTAT_DATA_UINT64 }, - { "dnode_size", KSTAT_DATA_UINT64 }, - { "bonus_size", KSTAT_DATA_UINT64 }, -#if defined(__FreeBSD__) && defined(COMPAT_FREEBSD11) - { "other_size", KSTAT_DATA_UINT64 }, -#endif - { "anon_size", KSTAT_DATA_UINT64 }, - { "anon_evictable_data", KSTAT_DATA_UINT64 }, - { "anon_evictable_metadata", KSTAT_DATA_UINT64 }, - { "mru_size", KSTAT_DATA_UINT64 }, - { "mru_evictable_data", KSTAT_DATA_UINT64 }, - { "mru_evictable_metadata", KSTAT_DATA_UINT64 }, - { "mru_ghost_size", KSTAT_DATA_UINT64 }, - { "mru_ghost_evictable_data", KSTAT_DATA_UINT64 }, - { "mru_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, - { "mfu_size", KSTAT_DATA_UINT64 }, - { "mfu_evictable_data", KSTAT_DATA_UINT64 }, - { "mfu_evictable_metadata", KSTAT_DATA_UINT64 }, - { "mfu_ghost_size", KSTAT_DATA_UINT64 }, - { "mfu_ghost_evictable_data", KSTAT_DATA_UINT64 }, - { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, - { "l2_hits", KSTAT_DATA_UINT64 }, - { "l2_misses", KSTAT_DATA_UINT64 }, - { "l2_feeds", KSTAT_DATA_UINT64 }, - { "l2_rw_clash", KSTAT_DATA_UINT64 }, - { "l2_read_bytes", KSTAT_DATA_UINT64 }, - { "l2_write_bytes", KSTAT_DATA_UINT64 }, - { "l2_writes_sent", KSTAT_DATA_UINT64 }, - { "l2_writes_done", KSTAT_DATA_UINT64 }, - { "l2_writes_error", KSTAT_DATA_UINT64 }, - { "l2_writes_lock_retry", KSTAT_DATA_UINT64 }, - { "l2_evict_lock_retry", KSTAT_DATA_UINT64 }, - { "l2_evict_reading", KSTAT_DATA_UINT64 }, - { "l2_evict_l1cached", KSTAT_DATA_UINT64 }, - { "l2_free_on_write", KSTAT_DATA_UINT64 }, - { "l2_abort_lowmem", KSTAT_DATA_UINT64 }, - { "l2_cksum_bad", KSTAT_DATA_UINT64 }, - { "l2_io_error", KSTAT_DATA_UINT64 }, - { "l2_size", KSTAT_DATA_UINT64 }, - { "l2_asize", KSTAT_DATA_UINT64 }, - { "l2_hdr_size", KSTAT_DATA_UINT64 }, - { "l2_write_trylock_fail", KSTAT_DATA_UINT64 }, - { "l2_write_passed_headroom", KSTAT_DATA_UINT64 }, - { "l2_write_spa_mismatch", KSTAT_DATA_UINT64 }, - { "l2_write_in_l2", KSTAT_DATA_UINT64 }, - { "l2_write_io_in_progress", KSTAT_DATA_UINT64 }, - { "l2_write_not_cacheable", KSTAT_DATA_UINT64 }, - { "l2_write_full", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_iter", KSTAT_DATA_UINT64 }, - { "l2_write_pios", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 }, - { "memory_throttle_count", KSTAT_DATA_UINT64 }, - { "memory_direct_count", KSTAT_DATA_UINT64 }, - { "memory_indirect_count", KSTAT_DATA_UINT64 }, - { "memory_all_bytes", KSTAT_DATA_UINT64 }, - { "memory_free_bytes", KSTAT_DATA_UINT64 }, - { "memory_available_bytes", KSTAT_DATA_UINT64 }, - { "arc_no_grow", KSTAT_DATA_UINT64 }, - { "arc_tempreserve", KSTAT_DATA_UINT64 }, - { "arc_loaned_bytes", KSTAT_DATA_UINT64 }, - { "arc_prune", KSTAT_DATA_UINT64 }, - { "arc_meta_used", KSTAT_DATA_UINT64 }, - { "arc_meta_limit", KSTAT_DATA_UINT64 }, - { "arc_dnode_limit", KSTAT_DATA_UINT64 }, - { "arc_meta_max", KSTAT_DATA_UINT64 }, - { "arc_meta_min", KSTAT_DATA_UINT64 }, - { "async_upgrade_sync", KSTAT_DATA_UINT64 }, - { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, - { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 }, -}; - -#define ARCSTAT(stat) (arc_stats.stat.value.ui64) - -#define ARCSTAT_INCR(stat, val) \ - atomic_add_64(&arc_stats.stat.value.ui64, (val)) - -#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) -#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) - -#define ARCSTAT_MAX(stat, val) { \ - uint64_t m; \ - while ((val) > (m = arc_stats.stat.value.ui64) && \ - (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val)))) \ - continue; \ -} - -#define ARCSTAT_MAXSTAT(stat) \ - ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64) - -/* - * We define a macro to allow ARC hits/misses to be easily broken down by - * two separate conditions, giving a total of four different subtypes for - * each of hits and misses (so eight statistics total). - */ -#define ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \ - if (cond1) { \ - if (cond2) { \ - ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \ - } else { \ - ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \ - } \ - } else { \ - if (cond2) { \ - ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \ - } else { \ - ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\ - } \ - } - -kstat_t *arc_ksp; -static arc_state_t *arc_anon; -static arc_state_t *arc_mru; -static arc_state_t *arc_mru_ghost; -static arc_state_t *arc_mfu; -static arc_state_t *arc_mfu_ghost; -static arc_state_t *arc_l2c_only; - -/* - * There are several ARC variables that are critical to export as kstats -- - * but we don't want to have to grovel around in the kstat whenever we wish to - * manipulate them. For these variables, we therefore define them to be in - * terms of the statistic variable. This assures that we are not introducing - * the possibility of inconsistency by having shadow copies of the variables, - * while still allowing the code to be readable. - */ -#define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ -#define arc_c ARCSTAT(arcstat_c) /* target size of cache */ -#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ -#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ -#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */ -#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */ -#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ -#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ -#define arc_dbuf_size ARCSTAT(arcstat_dbuf_size) /* dbuf metadata */ -#define arc_dnode_size ARCSTAT(arcstat_dnode_size) /* dnode metadata */ -#define arc_bonus_size ARCSTAT(arcstat_bonus_size) /* bonus buffer metadata */ - -/* compressed size of entire arc */ -#define arc_compressed_size ARCSTAT(arcstat_compressed_size) -/* uncompressed size of entire arc */ -#define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size) -/* number of bytes in the arc from arc_buf_t's */ -#define arc_overhead_size ARCSTAT(arcstat_overhead_size) - -/* - * There are also some ARC variables that we want to export, but that are - * updated so often that having the canonical representation be the statistic - * variable causes a performance bottleneck. We want to use aggsum_t's for these - * instead, but still be able to export the kstat in the same way as before. - * The solution is to always use the aggsum version, except in the kstat update - * callback. - */ -aggsum_t arc_size; -aggsum_t arc_meta_used; -aggsum_t astat_data_size; -aggsum_t astat_metadata_size; -aggsum_t astat_hdr_size; -aggsum_t astat_bonus_size; -aggsum_t astat_dnode_size; -aggsum_t astat_dbuf_size; -aggsum_t astat_l2_hdr_size; - -static list_t arc_prune_list; -static kmutex_t arc_prune_mtx; -static taskq_t *arc_prune_taskq; - -static int arc_no_grow; /* Don't try to grow cache size */ -static hrtime_t arc_growtime; -static uint64_t arc_tempreserve; -static uint64_t arc_loaned_bytes; - -typedef struct arc_callback arc_callback_t; - -struct arc_callback { - void *acb_private; - arc_read_done_func_t *acb_done; - arc_buf_t *acb_buf; - boolean_t acb_compressed; - zio_t *acb_zio_dummy; - zio_t *acb_zio_head; - arc_callback_t *acb_next; -}; - -typedef struct arc_write_callback arc_write_callback_t; - -struct arc_write_callback { - void *awcb_private; - arc_write_done_func_t *awcb_ready; - arc_write_done_func_t *awcb_children_ready; - arc_write_done_func_t *awcb_physdone; - arc_write_done_func_t *awcb_done; - arc_buf_t *awcb_buf; -}; - -/* - * ARC buffers are separated into multiple structs as a memory saving measure: - * - Common fields struct, always defined, and embedded within it: - * - L2-only fields, always allocated but undefined when not in L2ARC - * - L1-only fields, only allocated when in L1ARC - * - * Buffer in L1 Buffer only in L2 - * +------------------------+ +------------------------+ - * | arc_buf_hdr_t | | arc_buf_hdr_t | - * | | | | - * | | | | - * | | | | - * +------------------------+ +------------------------+ - * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | - * | (undefined if L1-only) | | | - * +------------------------+ +------------------------+ - * | l1arc_buf_hdr_t | - * | | - * | | - * | | - * | | - * +------------------------+ - * - * Because it's possible for the L2ARC to become extremely large, we can wind - * up eating a lot of memory in L2ARC buffer headers, so the size of a header - * is minimized by only allocating the fields necessary for an L1-cached buffer - * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and - * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple - * words in pointers. arc_hdr_realloc() is used to switch a header between - * these two allocation states. - */ -typedef struct l1arc_buf_hdr { - kmutex_t b_freeze_lock; - zio_cksum_t *b_freeze_cksum; -#ifdef ZFS_DEBUG - /* - * Used for debugging with kmem_flags - by allocating and freeing - * b_thawed when the buffer is thawed, we get a record of the stack - * trace that thawed it. - */ - void *b_thawed; -#endif - - arc_buf_t *b_buf; - uint32_t b_bufcnt; - /* for waiting on writes to complete */ - kcondvar_t b_cv; - uint8_t b_byteswap; - - /* protected by arc state mutex */ - arc_state_t *b_state; - multilist_node_t b_arc_node; - - /* updated atomically */ - clock_t b_arc_access; - uint32_t b_mru_hits; - uint32_t b_mru_ghost_hits; - uint32_t b_mfu_hits; - uint32_t b_mfu_ghost_hits; - uint32_t b_l2_hits; - - /* self protecting */ - zfs_refcount_t b_refcnt; - - arc_callback_t *b_acb; - abd_t *b_pabd; -} l1arc_buf_hdr_t; - -typedef struct l2arc_dev l2arc_dev_t; - -typedef struct l2arc_buf_hdr { - /* protected by arc_buf_hdr mutex */ - l2arc_dev_t *b_dev; /* L2ARC device */ - uint64_t b_daddr; /* disk address, offset byte */ - uint32_t b_hits; - - list_node_t b_l2node; -} l2arc_buf_hdr_t; - -struct arc_buf_hdr { - /* protected by hash lock */ - dva_t b_dva; - uint64_t b_birth; - - arc_buf_contents_t b_type; - arc_buf_hdr_t *b_hash_next; - arc_flags_t b_flags; - - /* - * This field stores the size of the data buffer after - * compression, and is set in the arc's zio completion handlers. - * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes). - * - * While the block pointers can store up to 32MB in their psize - * field, we can only store up to 32MB minus 512B. This is due - * to the bp using a bias of 1, whereas we use a bias of 0 (i.e. - * a field of zeros represents 512B in the bp). We can't use a - * bias of 1 since we need to reserve a psize of zero, here, to - * represent holes and embedded blocks. - * - * This isn't a problem in practice, since the maximum size of a - * buffer is limited to 16MB, so we never need to store 32MB in - * this field. Even in the upstream illumos code base, the - * maximum size of a buffer is limited to 16MB. - */ - uint16_t b_psize; - - /* - * This field stores the size of the data buffer before - * compression, and cannot change once set. It is in units - * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes) - */ - uint16_t b_lsize; /* immutable */ - uint64_t b_spa; /* immutable */ - - /* L2ARC fields. Undefined when not in L2ARC. */ - l2arc_buf_hdr_t b_l2hdr; - /* L1ARC fields. Undefined when in l2arc_only state */ - l1arc_buf_hdr_t b_l1hdr; -}; - -#if defined(__FreeBSD__) && defined(_KERNEL) -static int -sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) -{ - uint64_t val; - int err; - - val = arc_meta_limit; - err = sysctl_handle_64(oidp, &val, 0, req); - if (err != 0 || req->newptr == NULL) - return (err); - - if (val <= 0 || val > arc_c_max) - return (EINVAL); - - arc_meta_limit = val; - - mutex_enter(&arc_adjust_lock); - arc_adjust_needed = B_TRUE; - mutex_exit(&arc_adjust_lock); - zthr_wakeup(arc_adjust_zthr); - - return (0); -} - -static int -sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) -{ - uint32_t val; - int err; - - val = arc_no_grow_shift; - err = sysctl_handle_32(oidp, &val, 0, req); - if (err != 0 || req->newptr == NULL) - return (err); - - if (val >= arc_shrink_shift) - return (EINVAL); - - arc_no_grow_shift = val; - return (0); -} - -static int -sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS) -{ - uint64_t val; - int err; - - val = zfs_arc_max; - err = sysctl_handle_64(oidp, &val, 0, req); - if (err != 0 || req->newptr == NULL) - return (err); - - if (zfs_arc_max == 0) { - /* Loader tunable so blindly set */ - zfs_arc_max = val; - return (0); - } - - if (val < arc_abs_min || val > kmem_size()) - return (EINVAL); - if (val < arc_c_min) - return (EINVAL); - if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) - return (EINVAL); - - arc_c_max = val; - - arc_c = arc_c_max; - arc_p = (arc_c >> 1); - - if (zfs_arc_meta_limit == 0) { - /* limit meta-data to 1/4 of the arc capacity */ - arc_meta_limit = arc_c_max / 4; - } - - /* if kmem_flags are set, lets try to use less memory */ - if (kmem_debugging()) - arc_c = arc_c / 2; - - zfs_arc_max = arc_c; - - mutex_enter(&arc_adjust_lock); - arc_adjust_needed = B_TRUE; - mutex_exit(&arc_adjust_lock); - zthr_wakeup(arc_adjust_zthr); - - return (0); -} - -static int -sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS) -{ - uint64_t val; - int err; - - val = zfs_arc_min; - err = sysctl_handle_64(oidp, &val, 0, req); - if (err != 0 || req->newptr == NULL) - return (err); - - if (zfs_arc_min == 0) { - /* Loader tunable so blindly set */ - zfs_arc_min = val; - return (0); - } - - if (val < arc_abs_min || val > arc_c_max) - return (EINVAL); - - arc_c_min = val; - - if (zfs_arc_meta_min == 0) - arc_meta_min = arc_c_min / 2; - - if (arc_c < arc_c_min) - arc_c = arc_c_min; - - zfs_arc_min = arc_c_min; - - return (0); -} -#endif - -#define GHOST_STATE(state) \ - ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ - (state) == arc_l2c_only) - -#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_FLAG_IN_HASH_TABLE) -#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) -#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR) -#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH) -#define HDR_PRESCIENT_PREFETCH(hdr) \ - ((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) -#define HDR_COMPRESSION_ENABLED(hdr) \ - ((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC) - -#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE) -#define HDR_L2_READING(hdr) \ - (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \ - ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)) -#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) -#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) -#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) -#define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA) - -#define HDR_ISTYPE_METADATA(hdr) \ - ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA) -#define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr)) - -#define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR) -#define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR) - -/* For storing compression mode in b_flags */ -#define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1) - -#define HDR_GET_COMPRESS(hdr) ((enum zio_compress)BF32_GET((hdr)->b_flags, \ - HDR_COMPRESS_OFFSET, SPA_COMPRESSBITS)) -#define HDR_SET_COMPRESS(hdr, cmp) BF32_SET((hdr)->b_flags, \ - HDR_COMPRESS_OFFSET, SPA_COMPRESSBITS, (cmp)); - -#define ARC_BUF_LAST(buf) ((buf)->b_next == NULL) -#define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED) -#define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED) - -/* - * Other sizes - */ - -#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) -#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) - -/* - * Hash table routines - */ - -#define HT_LOCK_PAD CACHE_LINE_SIZE - -struct ht_lock { - kmutex_t ht_lock; -#ifdef _KERNEL - unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))]; -#endif -}; - -#define BUF_LOCKS 256 -typedef struct buf_hash_table { - uint64_t ht_mask; - arc_buf_hdr_t **ht_table; - struct ht_lock ht_locks[BUF_LOCKS] __aligned(CACHE_LINE_SIZE); -} buf_hash_table_t; - -static buf_hash_table_t buf_hash_table; - -#define BUF_HASH_INDEX(spa, dva, birth) \ - (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask) -#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)]) -#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock)) -#define HDR_LOCK(hdr) \ - (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth))) - -uint64_t zfs_crc64_table[256]; - -/* - * Level 2 ARC - */ - -#define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */ -#define L2ARC_HEADROOM 2 /* num of writes */ -/* - * If we discover during ARC scan any buffers to be compressed, we boost - * our headroom for the next scanning cycle by this percentage multiple. - */ -#define L2ARC_HEADROOM_BOOST 200 -#define L2ARC_FEED_SECS 1 /* caching interval secs */ -#define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */ - -#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) -#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done) - -/* L2ARC Performance Tunables */ -uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* default max write size */ -uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */ -uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */ -uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; -uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */ -boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ -boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */ -boolean_t l2arc_norw = B_TRUE; /* no reads during writes */ - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RWTUN, - &l2arc_write_max, 0, "max write size"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RWTUN, - &l2arc_write_boost, 0, "extra write during warmup"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RWTUN, - &l2arc_headroom, 0, "number of dev writes"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RWTUN, - &l2arc_feed_secs, 0, "interval seconds"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RWTUN, - &l2arc_feed_min_ms, 0, "min interval milliseconds"); - -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RWTUN, - &l2arc_noprefetch, 0, "don't cache prefetch bufs"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RWTUN, - &l2arc_feed_again, 0, "turbo warmup"); -SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RWTUN, - &l2arc_norw, 0, "no reads during writes"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD, - &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, - &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of anonymous state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, - &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of anonymous state"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD, - &ARC_mru.arcs_size.rc_count, 0, "size of mru state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD, - &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of metadata in mru state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD, - &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of data in mru state"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD, - &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD, - &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of metadata in mru ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD, - &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of data in mru ghost state"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD, - &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD, - &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of metadata in mfu state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD, - &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of data in mfu state"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD, - &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD, - &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, - "size of metadata in mfu ghost state"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, - &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, - "size of data in mfu ghost state"); - -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD, - &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state"); - -SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_min_prefetch_ms, CTLFLAG_RW, - &zfs_arc_min_prefetch_ms, 0, "Min life of prefetch block in ms"); -SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_min_prescient_prefetch_ms, CTLFLAG_RW, - &zfs_arc_min_prescient_prefetch_ms, 0, "Min life of prescient prefetched block in ms"); - -/* - * L2ARC Internals - */ -struct l2arc_dev { - vdev_t *l2ad_vdev; /* vdev */ - spa_t *l2ad_spa; /* spa */ - uint64_t l2ad_hand; /* next write location */ - uint64_t l2ad_start; /* first addr on device */ - uint64_t l2ad_end; /* last addr on device */ - boolean_t l2ad_first; /* first sweep through */ - boolean_t l2ad_writing; /* currently writing */ - kmutex_t l2ad_mtx; /* lock for buffer list */ - list_t l2ad_buflist; /* buffer list */ - list_node_t l2ad_node; /* device list node */ - zfs_refcount_t l2ad_alloc; /* allocated bytes */ -}; - -static list_t L2ARC_dev_list; /* device list */ -static list_t *l2arc_dev_list; /* device list pointer */ -static kmutex_t l2arc_dev_mtx; /* device list mutex */ -static l2arc_dev_t *l2arc_dev_last; /* last device used */ -static list_t L2ARC_free_on_write; /* free after write buf list */ -static list_t *l2arc_free_on_write; /* free after write list ptr */ -static kmutex_t l2arc_free_on_write_mtx; /* mutex for list */ -static uint64_t l2arc_ndev; /* number of devices */ - -typedef struct l2arc_read_callback { - arc_buf_hdr_t *l2rcb_hdr; /* read header */ - blkptr_t l2rcb_bp; /* original blkptr */ - zbookmark_phys_t l2rcb_zb; /* original bookmark */ - int l2rcb_flags; /* original flags */ - abd_t *l2rcb_abd; /* temporary buffer */ -} l2arc_read_callback_t; - -typedef struct l2arc_write_callback { - l2arc_dev_t *l2wcb_dev; /* device info */ - arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ -} l2arc_write_callback_t; - -typedef struct l2arc_data_free { - /* protected by l2arc_free_on_write_mtx */ - abd_t *l2df_abd; - size_t l2df_size; - arc_buf_contents_t l2df_type; - list_node_t l2df_list_node; -} l2arc_data_free_t; - -static kmutex_t l2arc_feed_thr_lock; -static kcondvar_t l2arc_feed_thr_cv; -static uint8_t l2arc_thread_exit; - -static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t); -static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); -static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t); -static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); -static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); -static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); -static void arc_hdr_free_pabd(arc_buf_hdr_t *); -static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t); -static void arc_access(arc_buf_hdr_t *, kmutex_t *); -static boolean_t arc_is_overflowing(); -static void arc_buf_watch(arc_buf_t *); -static void arc_prune_async(int64_t); - -static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); -static uint32_t arc_bufc_to_flags(arc_buf_contents_t); -static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); -static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); - -static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); -static void l2arc_read_done(zio_t *); - -static void -l2arc_trim(const arc_buf_hdr_t *hdr) -{ - l2arc_dev_t *dev = hdr->b_l2hdr.b_dev; - - ASSERT(HDR_HAS_L2HDR(hdr)); - ASSERT(MUTEX_HELD(&dev->l2ad_mtx)); - - if (HDR_GET_PSIZE(hdr) != 0) { - trim_map_free(dev->l2ad_vdev, hdr->b_l2hdr.b_daddr, - HDR_GET_PSIZE(hdr), 0); - } -} - -/* - * We use Cityhash for this. It's fast, and has good hash properties without - * requiring any large static buffers. - */ -static uint64_t -buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth) -{ - return (cityhash4(spa, dva->dva_word[0], dva->dva_word[1], birth)); -} - -#define HDR_EMPTY(hdr) \ - ((hdr)->b_dva.dva_word[0] == 0 && \ - (hdr)->b_dva.dva_word[1] == 0) - -#define HDR_EQUAL(spa, dva, birth, hdr) \ - ((hdr)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \ - ((hdr)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \ - ((hdr)->b_birth == birth) && ((hdr)->b_spa == spa) - -static void -buf_discard_identity(arc_buf_hdr_t *hdr) -{ - hdr->b_dva.dva_word[0] = 0; - hdr->b_dva.dva_word[1] = 0; - hdr->b_birth = 0; -} - -static arc_buf_hdr_t * -buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp) -{ - const dva_t *dva = BP_IDENTITY(bp); - uint64_t birth = BP_PHYSICAL_BIRTH(bp); - uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); - kmutex_t *hash_lock = BUF_HASH_LOCK(idx); - arc_buf_hdr_t *hdr; - - mutex_enter(hash_lock); - for (hdr = buf_hash_table.ht_table[idx]; hdr != NULL; - hdr = hdr->b_hash_next) { - if (HDR_EQUAL(spa, dva, birth, hdr)) { - *lockp = hash_lock; - return (hdr); - } - } - mutex_exit(hash_lock); - *lockp = NULL; - return (NULL); -} - -/* - * Insert an entry into the hash table. If there is already an element - * equal to elem in the hash table, then the already existing element - * will be returned and the new element will not be inserted. - * Otherwise returns NULL. - * If lockp == NULL, the caller is assumed to already hold the hash lock. - */ -static arc_buf_hdr_t * -buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp) -{ - uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); - kmutex_t *hash_lock = BUF_HASH_LOCK(idx); - arc_buf_hdr_t *fhdr; - uint32_t i; - - ASSERT(!DVA_IS_EMPTY(&hdr->b_dva)); - ASSERT(hdr->b_birth != 0); - ASSERT(!HDR_IN_HASH_TABLE(hdr)); - - if (lockp != NULL) { - *lockp = hash_lock; - mutex_enter(hash_lock); - } else { - ASSERT(MUTEX_HELD(hash_lock)); - } - - for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL; - fhdr = fhdr->b_hash_next, i++) { - if (HDR_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr)) - return (fhdr); - } - - hdr->b_hash_next = buf_hash_table.ht_table[idx]; - buf_hash_table.ht_table[idx] = hdr; - arc_hdr_set_flags(hdr, ARC_FLAG_IN_HASH_TABLE); - - /* collect some hash table performance data */ - if (i > 0) { - ARCSTAT_BUMP(arcstat_hash_collisions); - if (i == 1) - ARCSTAT_BUMP(arcstat_hash_chains); - - ARCSTAT_MAX(arcstat_hash_chain_max, i); - } - - ARCSTAT_BUMP(arcstat_hash_elements); - ARCSTAT_MAXSTAT(arcstat_hash_elements); - - return (NULL); -} - -static void -buf_hash_remove(arc_buf_hdr_t *hdr) -{ - arc_buf_hdr_t *fhdr, **hdrp; - uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); - - ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx))); - ASSERT(HDR_IN_HASH_TABLE(hdr)); - - hdrp = &buf_hash_table.ht_table[idx]; - while ((fhdr = *hdrp) != hdr) { - ASSERT3P(fhdr, !=, NULL); - hdrp = &fhdr->b_hash_next; - } - *hdrp = hdr->b_hash_next; - hdr->b_hash_next = NULL; - arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE); - - /* collect some hash table performance data */ - ARCSTAT_BUMPDOWN(arcstat_hash_elements); - - if (buf_hash_table.ht_table[idx] && - buf_hash_table.ht_table[idx]->b_hash_next == NULL) - ARCSTAT_BUMPDOWN(arcstat_hash_chains); -} - -/* - * Global data structures and functions for the buf kmem cache. - */ -static kmem_cache_t *hdr_full_cache; -static kmem_cache_t *hdr_l2only_cache; -static kmem_cache_t *buf_cache; - -static void -buf_fini(void) -{ - int i; - - kmem_free(buf_hash_table.ht_table, - (buf_hash_table.ht_mask + 1) * sizeof (void *)); - for (i = 0; i < BUF_LOCKS; i++) - mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); - kmem_cache_destroy(hdr_full_cache); - kmem_cache_destroy(hdr_l2only_cache); - kmem_cache_destroy(buf_cache); -} - -/* - * Constructor callback - called when the cache is empty - * and a new buf is requested. - */ -/* ARGSUSED */ -static int -hdr_full_cons(void *vbuf, void *unused, int kmflag) -{ - arc_buf_hdr_t *hdr = vbuf; - - bzero(hdr, HDR_FULL_SIZE); - cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); - zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); - mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); - multilist_link_init(&hdr->b_l1hdr.b_arc_node); - arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS); - - return (0); -} - -/* ARGSUSED */ -static int -hdr_l2only_cons(void *vbuf, void *unused, int kmflag) -{ - arc_buf_hdr_t *hdr = vbuf; - - bzero(hdr, HDR_L2ONLY_SIZE); - arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS); - - return (0); -} - -/* ARGSUSED */ -static int -buf_cons(void *vbuf, void *unused, int kmflag) -{ - arc_buf_t *buf = vbuf; - - bzero(buf, sizeof (arc_buf_t)); - mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL); - arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS); - - return (0); -} - -/* - * Destructor callback - called when a cached buf is - * no longer required. - */ -/* ARGSUSED */ -static void -hdr_full_dest(void *vbuf, void *unused) -{ - arc_buf_hdr_t *hdr = vbuf; - - ASSERT(HDR_EMPTY(hdr)); - cv_destroy(&hdr->b_l1hdr.b_cv); - zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt); - mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); - ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); - arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); -} - -/* ARGSUSED */ -static void -hdr_l2only_dest(void *vbuf, void *unused) -{ - arc_buf_hdr_t *hdr = vbuf; - - ASSERT(HDR_EMPTY(hdr)); - arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS); -} - -/* ARGSUSED */ -static void -buf_dest(void *vbuf, void *unused) -{ - arc_buf_t *buf = vbuf; - - mutex_destroy(&buf->b_evict_lock); - arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); -} - -/* - * Reclaim callback -- invoked when memory is low. - */ -/* ARGSUSED */ -static void -hdr_recl(void *unused) -{ - dprintf("hdr_recl called\n"); - /* - * umem calls the reclaim func when we destroy the buf cache, - * which is after we do arc_fini(). - */ - if (arc_initialized) - zthr_wakeup(arc_reap_zthr); -} - -static void -buf_init(void) -{ - uint64_t *ct; - uint64_t hsize = 1ULL << 12; - int i, j; - - /* - * The hash table is big enough to fill all of physical memory - * with an average block size of zfs_arc_average_blocksize (default 8K). - * By default, the table will take up - * totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers). - */ - while (hsize * zfs_arc_average_blocksize < (uint64_t)physmem * PAGESIZE) - hsize <<= 1; -retry: - buf_hash_table.ht_mask = hsize - 1; - buf_hash_table.ht_table = - kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP); - if (buf_hash_table.ht_table == NULL) { - ASSERT(hsize > (1ULL << 8)); - hsize >>= 1; - goto retry; - } - - hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, - 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); - hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", - HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, - NULL, NULL, 0); - buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t), - 0, buf_cons, buf_dest, NULL, NULL, NULL, 0); - - for (i = 0; i < 256; i++) - for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--) - *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY); - - for (i = 0; i < BUF_LOCKS; i++) { - mutex_init(&buf_hash_table.ht_locks[i].ht_lock, - NULL, MUTEX_DEFAULT, NULL); - } -} - -/* - * This is the size that the buf occupies in memory. If the buf is compressed, - * it will correspond to the compressed size. You should use this method of - * getting the buf size unless you explicitly need the logical size. - */ -int32_t -arc_buf_size(arc_buf_t *buf) -{ - return (ARC_BUF_COMPRESSED(buf) ? - HDR_GET_PSIZE(buf->b_hdr) : HDR_GET_LSIZE(buf->b_hdr)); -} - -int32_t -arc_buf_lsize(arc_buf_t *buf) -{ - return (HDR_GET_LSIZE(buf->b_hdr)); -} - -enum zio_compress -arc_get_compression(arc_buf_t *buf) -{ - return (ARC_BUF_COMPRESSED(buf) ? - HDR_GET_COMPRESS(buf->b_hdr) : ZIO_COMPRESS_OFF); -} - -#define ARC_MINTIME (hz>>4) /* 62 ms */ - -static inline boolean_t -arc_buf_is_shared(arc_buf_t *buf) -{ - boolean_t shared = (buf->b_data != NULL && - buf->b_hdr->b_l1hdr.b_pabd != NULL && - abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) && - buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd)); - IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr)); - IMPLY(shared, ARC_BUF_SHARED(buf)); - IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf)); - - /* - * It would be nice to assert arc_can_share() too, but the "hdr isn't - * already being shared" requirement prevents us from doing that. - */ - - return (shared); -} - -/* - * Free the checksum associated with this header. If there is no checksum, this - * is a no-op. - */ -static inline void -arc_cksum_free(arc_buf_hdr_t *hdr) -{ - ASSERT(HDR_HAS_L1HDR(hdr)); - mutex_enter(&hdr->b_l1hdr.b_freeze_lock); - if (hdr->b_l1hdr.b_freeze_cksum != NULL) { - kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t)); - hdr->b_l1hdr.b_freeze_cksum = NULL; - } - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); -} - -/* - * Return true iff at least one of the bufs on hdr is not compressed. - */ -static boolean_t -arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) -{ - for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) { - if (!ARC_BUF_COMPRESSED(b)) { - return (B_TRUE); - } - } - return (B_FALSE); -} - -/* - * If we've turned on the ZFS_DEBUG_MODIFY flag, verify that the buf's data - * matches the checksum that is stored in the hdr. If there is no checksum, - * or if the buf is compressed, this is a no-op. - */ -static void -arc_cksum_verify(arc_buf_t *buf) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - zio_cksum_t zc; - - if (!(zfs_flags & ZFS_DEBUG_MODIFY)) - return; - - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); - return; - } - - ASSERT(HDR_HAS_L1HDR(hdr)); - - mutex_enter(&hdr->b_l1hdr.b_freeze_lock); - if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) { - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); - return; - } - - fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc); - if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc)) - panic("buffer modified while frozen!"); - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); -} - -static boolean_t -arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) -{ - enum zio_compress compress = BP_GET_COMPRESS(zio->io_bp); - boolean_t valid_cksum; - - ASSERT(!BP_IS_EMBEDDED(zio->io_bp)); - VERIFY3U(BP_GET_PSIZE(zio->io_bp), ==, HDR_GET_PSIZE(hdr)); - - /* - * We rely on the blkptr's checksum to determine if the block - * is valid or not. When compressed arc is enabled, the l2arc - * writes the block to the l2arc just as it appears in the pool. - * This allows us to use the blkptr's checksum to validate the - * data that we just read off of the l2arc without having to store - * a separate checksum in the arc_buf_hdr_t. However, if compressed - * arc is disabled, then the data written to the l2arc is always - * uncompressed and won't match the block as it exists in the main - * pool. When this is the case, we must first compress it if it is - * compressed on the main pool before we can validate the checksum. - */ - if (!HDR_COMPRESSION_ENABLED(hdr) && compress != ZIO_COMPRESS_OFF) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); - uint64_t lsize = HDR_GET_LSIZE(hdr); - uint64_t csize; - - abd_t *cdata = abd_alloc_linear(HDR_GET_PSIZE(hdr), B_TRUE); - csize = zio_compress_data(compress, zio->io_abd, - abd_to_buf(cdata), lsize); - - ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); - if (csize < HDR_GET_PSIZE(hdr)) { - /* - * Compressed blocks are always a multiple of the - * smallest ashift in the pool. Ideally, we would - * like to round up the csize to the next - * spa_min_ashift but that value may have changed - * since the block was last written. Instead, - * we rely on the fact that the hdr's psize - * was set to the psize of the block when it was - * last written. We set the csize to that value - * and zero out any part that should not contain - * data. - */ - abd_zero_off(cdata, csize, HDR_GET_PSIZE(hdr) - csize); - csize = HDR_GET_PSIZE(hdr); - } - zio_push_transform(zio, cdata, csize, HDR_GET_PSIZE(hdr), NULL); - } - - /* - * Block pointers always store the checksum for the logical data. - * If the block pointer has the gang bit set, then the checksum - * it represents is for the reconstituted data and not for an - * individual gang member. The zio pipeline, however, must be able to - * determine the checksum of each of the gang constituents so it - * treats the checksum comparison differently than what we need - * for l2arc blocks. This prevents us from using the - * zio_checksum_error() interface directly. Instead we must call the - * zio_checksum_error_impl() so that we can ensure the checksum is - * generated using the correct checksum algorithm and accounts for the - * logical I/O size and not just a gang fragment. - */ - valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp, - BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, - zio->io_offset, NULL) == 0); - zio_pop_transforms(zio); - return (valid_cksum); -} - -/* - * Given a buf full of data, if ZFS_DEBUG_MODIFY is enabled this computes a - * checksum and attaches it to the buf's hdr so that we can ensure that the buf - * isn't modified later on. If buf is compressed or there is already a checksum - * on the hdr, this is a no-op (we only checksum uncompressed bufs). - */ -static void -arc_cksum_compute(arc_buf_t *buf) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - - if (!(zfs_flags & ZFS_DEBUG_MODIFY)) - return; - - ASSERT(HDR_HAS_L1HDR(hdr)); - - mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); - if (hdr->b_l1hdr.b_freeze_cksum != NULL) { - ASSERT(arc_hdr_has_uncompressed_buf(hdr)); - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); - return; - } else if (ARC_BUF_COMPRESSED(buf)) { - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); - return; - } - - ASSERT(!ARC_BUF_COMPRESSED(buf)); - hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), - KM_SLEEP); - fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, - hdr->b_l1hdr.b_freeze_cksum); - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); -#ifdef illumos - arc_buf_watch(buf); -#endif -} - -#ifdef illumos -#ifndef _KERNEL -typedef struct procctl { - long cmd; - prwatch_t prwatch; -} procctl_t; -#endif - -/* ARGSUSED */ -static void -arc_buf_unwatch(arc_buf_t *buf) -{ -#ifndef _KERNEL - if (arc_watch) { - int result; - procctl_t ctl; - ctl.cmd = PCWATCH; - ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; - ctl.prwatch.pr_size = 0; - ctl.prwatch.pr_wflags = 0; - result = write(arc_procfd, &ctl, sizeof (ctl)); - ASSERT3U(result, ==, sizeof (ctl)); - } -#endif -} - -/* ARGSUSED */ -static void -arc_buf_watch(arc_buf_t *buf) -{ -#ifndef _KERNEL - if (arc_watch) { - int result; - procctl_t ctl; - ctl.cmd = PCWATCH; - ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; - ctl.prwatch.pr_size = arc_buf_size(buf); - ctl.prwatch.pr_wflags = WA_WRITE; - result = write(arc_procfd, &ctl, sizeof (ctl)); - ASSERT3U(result, ==, sizeof (ctl)); - } -#endif -} -#endif /* illumos */ - -static arc_buf_contents_t -arc_buf_type(arc_buf_hdr_t *hdr) -{ - arc_buf_contents_t type; - if (HDR_ISTYPE_METADATA(hdr)) { - type = ARC_BUFC_METADATA; - } else { - type = ARC_BUFC_DATA; - } - VERIFY3U(hdr->b_type, ==, type); - return (type); -} - -boolean_t -arc_is_metadata(arc_buf_t *buf) -{ - return (HDR_ISTYPE_METADATA(buf->b_hdr) != 0); -} - -static uint32_t -arc_bufc_to_flags(arc_buf_contents_t type) -{ - switch (type) { - case ARC_BUFC_DATA: - /* metadata field is 0 if buffer contains normal data */ - return (0); - case ARC_BUFC_METADATA: - return (ARC_FLAG_BUFC_METADATA); - default: - break; - } - panic("undefined ARC buffer type!"); - return ((uint32_t)-1); -} - -void -arc_buf_thaw(arc_buf_t *buf) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - - ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - - arc_cksum_verify(buf); - - /* - * Compressed buffers do not manipulate the b_freeze_cksum or - * allocate b_thawed. - */ - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); - return; - } - - ASSERT(HDR_HAS_L1HDR(hdr)); - arc_cksum_free(hdr); - - mutex_enter(&hdr->b_l1hdr.b_freeze_lock); -#ifdef ZFS_DEBUG - if (zfs_flags & ZFS_DEBUG_MODIFY) { - if (hdr->b_l1hdr.b_thawed != NULL) - kmem_free(hdr->b_l1hdr.b_thawed, 1); - hdr->b_l1hdr.b_thawed = kmem_alloc(1, KM_SLEEP); - } -#endif - - mutex_exit(&hdr->b_l1hdr.b_freeze_lock); - -#ifdef illumos - arc_buf_unwatch(buf); -#endif -} - -void -arc_buf_freeze(arc_buf_t *buf) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - kmutex_t *hash_lock; - - if (!(zfs_flags & ZFS_DEBUG_MODIFY)) - return; - - if (ARC_BUF_COMPRESSED(buf)) { - ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || - arc_hdr_has_uncompressed_buf(hdr)); - return; - } - - hash_lock = HDR_LOCK(hdr); - mutex_enter(hash_lock); - - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL || - hdr->b_l1hdr.b_state == arc_anon); - arc_cksum_compute(buf); - mutex_exit(hash_lock); -} - -/* - * The arc_buf_hdr_t's b_flags should never be modified directly. Instead, - * the following functions should be used to ensure that the flags are - * updated in a thread-safe way. When manipulating the flags either - * the hash_lock must be held or the hdr must be undiscoverable. This - * ensures that we're not racing with any other threads when updating - * the flags. - */ -static inline void -arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags) -{ - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - hdr->b_flags |= flags; -} - -static inline void -arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags) -{ - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - hdr->b_flags &= ~flags; -} - -/* - * Setting the compression bits in the arc_buf_hdr_t's b_flags is - * done in a special way since we have to clear and set bits - * at the same time. Consumers that wish to set the compression bits - * must use this function to ensure that the flags are updated in - * thread-safe manner. - */ -static void -arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp) -{ - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - - /* - * Holes and embedded blocks will always have a psize = 0 so - * we ignore the compression of the blkptr and set the - * arc_buf_hdr_t's compression to ZIO_COMPRESS_OFF. - * Holes and embedded blocks remain anonymous so we don't - * want to uncompress them. Mark them as uncompressed. - */ - if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) { - arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF); - ASSERT(!HDR_COMPRESSION_ENABLED(hdr)); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); - } else { - arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, cmp); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); - ASSERT(HDR_COMPRESSION_ENABLED(hdr)); - } -} - -/* - * Looks for another buf on the same hdr which has the data decompressed, copies - * from it, and returns true. If no such buf exists, returns false. - */ -static boolean_t -arc_buf_try_copy_decompressed_data(arc_buf_t *buf) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - boolean_t copied = B_FALSE; - - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3P(buf->b_data, !=, NULL); - ASSERT(!ARC_BUF_COMPRESSED(buf)); - - for (arc_buf_t *from = hdr->b_l1hdr.b_buf; from != NULL; - from = from->b_next) { - /* can't use our own data buffer */ - if (from == buf) { - continue; - } - - if (!ARC_BUF_COMPRESSED(from)) { - bcopy(from->b_data, buf->b_data, arc_buf_size(buf)); - copied = B_TRUE; - break; - } - } - - /* - * There were no decompressed bufs, so there should not be a - * checksum on the hdr either. - */ - EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); - - return (copied); -} - -/* - * Given a buf that has a data buffer attached to it, this function will - * efficiently fill the buf with data of the specified compression setting from - * the hdr and update the hdr's b_freeze_cksum if necessary. If the buf and hdr - * are already sharing a data buf, no copy is performed. - * - * If the buf is marked as compressed but uncompressed data was requested, this - * will allocate a new data buffer for the buf, remove that flag, and fill the - * buf with uncompressed data. You can't request a compressed buf on a hdr with - * uncompressed data, and (since we haven't added support for it yet) if you - * want compressed data your buf must already be marked as compressed and have - * the correct-sized data buffer. - */ -static int -arc_buf_fill(arc_buf_t *buf, boolean_t compressed) -{ - arc_buf_hdr_t *hdr = buf->b_hdr; - boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); - dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap; - - ASSERT3P(buf->b_data, !=, NULL); - IMPLY(compressed, hdr_compressed); - IMPLY(compressed, ARC_BUF_COMPRESSED(buf)); - - if (hdr_compressed == compressed) { - if (!arc_buf_is_shared(buf)) { - abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd, - arc_buf_size(buf)); - } - } else { - ASSERT(hdr_compressed); - ASSERT(!compressed); - ASSERT3U(HDR_GET_LSIZE(hdr), !=, HDR_GET_PSIZE(hdr)); - - /* - * If the buf is sharing its data with the hdr, unlink it and - * allocate a new data buffer for the buf. - */ - if (arc_buf_is_shared(buf)) { - ASSERT(ARC_BUF_COMPRESSED(buf)); - - /* We need to give the buf it's own b_data */ - buf->b_flags &= ~ARC_BUF_FLAG_SHARED; - buf->b_data = - arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); - arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA); - - /* Previously overhead was 0; just add new overhead */ - ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr)); - } else if (ARC_BUF_COMPRESSED(buf)) { - /* We need to reallocate the buf's b_data */ - arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr), - buf); - buf->b_data = - arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); - - /* We increased the size of b_data; update overhead */ - ARCSTAT_INCR(arcstat_overhead_size, - HDR_GET_LSIZE(hdr) - HDR_GET_PSIZE(hdr)); - } - - /* - * Regardless of the buf's previous compression settings, it - * should not be compressed at the end of this function. - */ - buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; - - /* - * Try copying the data from another buf which already has a - * decompressed version. If that's not possible, it's time to - * bite the bullet and decompress the data from the hdr. - */ - if (arc_buf_try_copy_decompressed_data(buf)) { - /* Skip byteswapping and checksumming (already done) */ - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); - return (0); - } else { - int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), - hdr->b_l1hdr.b_pabd, buf->b_data, - HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); - - /* - * Absent hardware errors or software bugs, this should - * be impossible, but log it anyway so we can debug it. - */ - if (error != 0) { - zfs_dbgmsg( - "hdr %p, compress %d, psize %d, lsize %d", - hdr, HDR_GET_COMPRESS(hdr), - HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); - return (SET_ERROR(EIO)); - } - } - } - - /* Byteswap the buf's data if necessary */ - if (bswap != DMU_BSWAP_NUMFUNCS) { - ASSERT(!HDR_SHARED_DATA(hdr)); - ASSERT3U(bswap, <, DMU_BSWAP_NUMFUNCS); - dmu_ot_byteswap[bswap].ob_func(buf->b_data, HDR_GET_LSIZE(hdr)); - } - - /* Compute the hdr's checksum if necessary */ - arc_cksum_compute(buf); - - return (0); -} - -int -arc_decompress(arc_buf_t *buf) -{ - return (arc_buf_fill(buf, B_FALSE)); -} - -/* - * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. - */ -static uint64_t -arc_hdr_size(arc_buf_hdr_t *hdr) -{ - uint64_t size; - - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && - HDR_GET_PSIZE(hdr) > 0) { - size = HDR_GET_PSIZE(hdr); - } else { - ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); - size = HDR_GET_LSIZE(hdr); - } - return (size); -} - -/* - * Increment the amount of evictable space in the arc_state_t's refcount. - * We account for the space used by the hdr and the arc buf individually - * so that we can add and remove them from the refcount individually. - */ -static void -arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) -{ - arc_buf_contents_t type = arc_buf_type(hdr); - - ASSERT(HDR_HAS_L1HDR(hdr)); - - if (GHOST_STATE(state)) { - ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - (void) zfs_refcount_add_many(&state->arcs_esize[type], - HDR_GET_LSIZE(hdr), hdr); - return; - } - - ASSERT(!GHOST_STATE(state)); - if (hdr->b_l1hdr.b_pabd != NULL) { - (void) zfs_refcount_add_many(&state->arcs_esize[type], - arc_hdr_size(hdr), hdr); - } - for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; - buf = buf->b_next) { - if (arc_buf_is_shared(buf)) - continue; - (void) zfs_refcount_add_many(&state->arcs_esize[type], - arc_buf_size(buf), buf); - } -} - -/* - * Decrement the amount of evictable space in the arc_state_t's refcount. - * We account for the space used by the hdr and the arc buf individually - * so that we can add and remove them from the refcount individually. - */ -static void -arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) -{ - arc_buf_contents_t type = arc_buf_type(hdr); - - ASSERT(HDR_HAS_L1HDR(hdr)); - - if (GHOST_STATE(state)) { - ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - (void) zfs_refcount_remove_many(&state->arcs_esize[type], - HDR_GET_LSIZE(hdr), hdr); - return; - } - - ASSERT(!GHOST_STATE(state)); - if (hdr->b_l1hdr.b_pabd != NULL) { - (void) zfs_refcount_remove_many(&state->arcs_esize[type], - arc_hdr_size(hdr), hdr); - } - for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; - buf = buf->b_next) { - if (arc_buf_is_shared(buf)) - continue; - (void) zfs_refcount_remove_many(&state->arcs_esize[type], - arc_buf_size(buf), buf); - } -} - -/* - * Add a reference to this hdr indicating that someone is actively - * referencing that memory. When the refcount transitions from 0 to 1, - * we remove it from the respective arc_state_t list to indicate that - * it is not evictable. - */ -static void -add_reference(arc_buf_hdr_t *hdr, void *tag) -{ - ASSERT(HDR_HAS_L1HDR(hdr)); - if (!MUTEX_HELD(HDR_LOCK(hdr))) { - ASSERT(hdr->b_l1hdr.b_state == arc_anon); - ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - } - - arc_state_t *state = hdr->b_l1hdr.b_state; - - if ((zfs_refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) && - (state != arc_anon)) { - /* We don't use the L2-only state list. */ - if (state != arc_l2c_only) { - multilist_remove(state->arcs_list[arc_buf_type(hdr)], - hdr); - arc_evictable_space_decrement(hdr, state); - } - /* remove the prefetch flag if we get a reference */ - arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); - } -} - -/* - * Remove a reference from this hdr. When the reference transitions from - * 1 to 0 and we're not anonymous, then we add this hdr to the arc_state_t's - * list making it eligible for eviction. - */ -static int -remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) -{ - int cnt; - arc_state_t *state = hdr->b_l1hdr.b_state; - - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(state == arc_anon || MUTEX_HELD(hash_lock)); - ASSERT(!GHOST_STATE(state)); - - /* - * arc_l2c_only counts as a ghost state so we don't need to explicitly - * check to prevent usage of the arc_l2c_only list. - */ - if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && - (state != arc_anon)) { - multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr); - ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); - arc_evictable_space_increment(hdr, state); - } - return (cnt); -} - -/* - * Returns detailed information about a specific arc buffer. When the - * state_index argument is set the function will calculate the arc header - * list position for its arc state. Since this requires a linear traversal - * callers are strongly encourage not to do this. However, it can be helpful - * for targeted analysis so the functionality is provided. - */ -void -arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index) -{ - arc_buf_hdr_t *hdr = ab->b_hdr; - l1arc_buf_hdr_t *l1hdr = NULL; - l2arc_buf_hdr_t *l2hdr = NULL; - arc_state_t *state = NULL; - - memset(abi, 0, sizeof (arc_buf_info_t)); - - if (hdr == NULL) - return; - - abi->abi_flags = hdr->b_flags; - - if (HDR_HAS_L1HDR(hdr)) { - l1hdr = &hdr->b_l1hdr; - state = l1hdr->b_state; - } - if (HDR_HAS_L2HDR(hdr)) - l2hdr = &hdr->b_l2hdr; - - if (l1hdr) { - abi->abi_bufcnt = l1hdr->b_bufcnt; - abi->abi_access = l1hdr->b_arc_access; - abi->abi_mru_hits = l1hdr->b_mru_hits; - abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits; - abi->abi_mfu_hits = l1hdr->b_mfu_hits; - abi->abi_mfu_ghost_hits = l1hdr->b_mfu_ghost_hits; - abi->abi_holds = zfs_refcount_count(&l1hdr->b_refcnt); - } - - if (l2hdr) { - abi->abi_l2arc_dattr = l2hdr->b_daddr; - abi->abi_l2arc_hits = l2hdr->b_hits; - } - - abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON; - abi->abi_state_contents = arc_buf_type(hdr); - abi->abi_size = arc_hdr_size(hdr); -} - -/* - * Move the supplied buffer to the indicated state. The hash lock - * for the buffer must be held by the caller. - */ -static void -arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, - kmutex_t *hash_lock) -{ - arc_state_t *old_state; - int64_t refcnt; - uint32_t bufcnt; - boolean_t update_old, update_new; - arc_buf_contents_t buftype = arc_buf_type(hdr); - - /* - * We almost always have an L1 hdr here, since we call arc_hdr_realloc() - * in arc_read() when bringing a buffer out of the L2ARC. However, the - * L1 hdr doesn't always exist when we change state to arc_anon before - * destroying a header, in which case reallocating to add the L1 hdr is - * pointless. - */ - if (HDR_HAS_L1HDR(hdr)) { - old_state = hdr->b_l1hdr.b_state; - refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt); - bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); - } else { - old_state = arc_l2c_only; - refcnt = 0; - bufcnt = 0; - update_old = B_FALSE; - } - update_new = update_old; - - ASSERT(MUTEX_HELD(hash_lock)); - ASSERT3P(new_state, !=, old_state); - ASSERT(!GHOST_STATE(new_state) || bufcnt == 0); - ASSERT(old_state != arc_anon || bufcnt <= 1); - - /* - * If this buffer is evictable, transfer it from the - * old state list to the new state list. - */ - if (refcnt == 0) { - if (old_state != arc_anon && old_state != arc_l2c_only) { - ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_remove(old_state->arcs_list[buftype], hdr); - - if (GHOST_STATE(old_state)) { - ASSERT0(bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - update_old = B_TRUE; - } - arc_evictable_space_decrement(hdr, old_state); - } - if (new_state != arc_anon && new_state != arc_l2c_only) { - - /* - * An L1 header always exists here, since if we're - * moving to some L1-cached state (i.e. not l2c_only or - * anonymous), we realloc the header to add an L1hdr - * beforehand. - */ - ASSERT(HDR_HAS_L1HDR(hdr)); - multilist_insert(new_state->arcs_list[buftype], hdr); - - if (GHOST_STATE(new_state)) { - ASSERT0(bufcnt); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - update_new = B_TRU |