diff options
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c | 496 |
1 files changed, 306 insertions, 190 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c index 5e563b632909..48d87f97f669 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c @@ -19,26 +19,28 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/dmu.h> +#include <sys/dmu_objset.h> #include <sys/dmu_tx.h> #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h> #include <sys/dsl_prop.h> #include <sys/dsl_synctask.h> +#include <sys/dsl_deleg.h> #include <sys/spa.h> #include <sys/zap.h> #include <sys/zio.h> #include <sys/arc.h> +#include <sys/sunddi.h> #include "zfs_namecheck.h" -static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); +static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); +static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, + cred_t *cr, dmu_tx_t *tx); /* ARGSUSED */ @@ -55,8 +57,6 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) ASSERT(dd->dd_space_towrite[t] == 0); } - ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); - if (dd->dd_parent) dsl_dir_close(dd->dd_parent, dd); @@ -91,9 +91,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dmu_object_info_t doi; dmu_object_info_from_db(dbuf, &doi); ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); + ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); } #endif - /* XXX assert bonus buffer size is correct */ if (dd == NULL) { dsl_dir_t *winner; int err; @@ -103,7 +103,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_dbuf = dbuf; dd->dd_pool = dp; dd->dd_phys = dbuf->db_data; - dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), @@ -112,36 +111,25 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, if (dd->dd_phys->dd_parent_obj) { err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, NULL, dd, &dd->dd_parent); - if (err) { - mutex_destroy(&dd->dd_lock); - kmem_free(dd, sizeof (dsl_dir_t)); - dmu_buf_rele(dbuf, tag); - return (err); - } + if (err) + goto errout; if (tail) { #ifdef ZFS_DEBUG uint64_t foundobj; err = zap_lookup(dp->dp_meta_objset, - dd->dd_parent->dd_phys-> - dd_child_dir_zapobj, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, tail, sizeof (foundobj), 1, &foundobj); ASSERT(err || foundobj == ddobj); #endif (void) strcpy(dd->dd_myname, tail); } else { err = zap_value_search(dp->dp_meta_objset, - dd->dd_parent->dd_phys-> - dd_child_dir_zapobj, - ddobj, dd->dd_myname); - } - if (err) { - dsl_dir_close(dd->dd_parent, dd); - mutex_destroy(&dd->dd_lock); - kmem_free(dd, sizeof (dsl_dir_t)); - dmu_buf_rele(dbuf, tag); - return (err); + dd->dd_parent->dd_phys->dd_child_dir_zapobj, + ddobj, 0, dd->dd_myname); } + if (err) + goto errout; } else { (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); } @@ -174,6 +162,15 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, ASSERT3P(dd->dd_dbuf, ==, dbuf); *ddp = dd; return (0); + +errout: + if (dd->dd_parent) + dsl_dir_close(dd->dd_parent, dd); + mutex_destroy(&dd->dd_lock); + kmem_free(dd, sizeof (dsl_dir_t)); + dmu_buf_rele(dbuf, tag); + return (err); + } void @@ -404,27 +401,37 @@ dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) } uint64_t -dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) +dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, + dmu_tx_t *tx) { - objset_t *mos = pds->dd_pool->dp_meta_objset; + objset_t *mos = dp->dp_meta_objset; uint64_t ddobj; dsl_dir_phys_t *dsphys; dmu_buf_t *dbuf; ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); - VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, - name, sizeof (uint64_t), 1, &ddobj, tx)); + if (pds) { + VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, + name, sizeof (uint64_t), 1, &ddobj, tx)); + } else { + /* it's the root dir */ + VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); + } VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; dsphys->dd_creation_time = gethrestime_sec(); - dsphys->dd_parent_obj = pds->dd_object; + if (pds) + dsphys->dd_parent_obj = pds->dd_object; dsphys->dd_props_zapobj = zap_create(mos, DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); dsphys->dd_child_dir_zapobj = zap_create(mos, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); + if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) + dsphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; dmu_buf_rele(dbuf, FTAG); return (ddobj); @@ -461,23 +468,27 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) } void -dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t val, obj; + dd_used_t t; ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); /* Remove our reservation. */ val = 0; - dsl_dir_set_reservation_sync(dd, &val, tx); - ASSERT3U(dd->dd_used_bytes, ==, 0); + dsl_dir_set_reservation_sync(dd, &val, cr, tx); + ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0); ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); + for (t = 0; t < DD_USED_NUM; t++) + ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0); VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); + VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); VERIFY(0 == zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); @@ -486,65 +497,53 @@ dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) VERIFY(0 == dmu_object_free(mos, obj, tx)); } -void -dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) +boolean_t +dsl_dir_is_clone(dsl_dir_t *dd) { - dsl_dir_phys_t *dsp; - dmu_buf_t *dbuf; - int error; - - *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, - DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); - - error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, - sizeof (uint64_t), 1, ddobjp, tx); - ASSERT3U(error, ==, 0); - - VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf)); - dmu_buf_will_dirty(dbuf, tx); - dsp = dbuf->db_data; - - dsp->dd_creation_time = gethrestime_sec(); - dsp->dd_props_zapobj = zap_create(mos, - DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); - dsp->dd_child_dir_zapobj = zap_create(mos, - DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); - - dmu_buf_rele(dbuf, FTAG); + return (dd->dd_phys->dd_origin_obj && + (dd->dd_pool->dp_origin_snap == NULL || + dd->dd_phys->dd_origin_obj != + dd->dd_pool->dp_origin_snap->ds_object)); } void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, - dsl_dir_space_available(dd, NULL, 0, TRUE)); - mutex_enter(&dd->dd_lock); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, - dd->dd_phys->dd_quota); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + dd->dd_phys->dd_used_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, dd->dd_phys->dd_reserved); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, dd->dd_phys->dd_compressed_bytes == 0 ? 100 : (dd->dd_phys->dd_uncompressed_bytes * 100 / dd->dd_phys->dd_compressed_bytes)); + if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, + dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, + dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, + dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, + dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] + + dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]); + } mutex_exit(&dd->dd_lock); - if (dd->dd_phys->dd_clone_parent_obj) { + rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(dd)) { dsl_dataset_t *ds; char buf[MAXNAMELEN]; - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, - dd->dd_phys->dd_clone_parent_obj, - NULL, DS_MODE_NONE, FTAG, &ds)); + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_origin_obj, FTAG, &ds)); dsl_dataset_name(ds, buf); - dsl_dataset_close(ds, DS_MODE_NONE, FTAG); - rw_exit(&dd->dd_pool->dp_config_rwlock); - + dsl_dataset_rele(ds, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); } + rw_exit(&dd->dd_pool->dp_config_rwlock); } void @@ -580,7 +579,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; - dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; mutex_exit(&dd->dd_lock); /* release the hold from dsl_dir_dirty */ @@ -588,15 +586,13 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) } static uint64_t -dsl_dir_estimated_space(dsl_dir_t *dd) +dsl_dir_space_towrite(dsl_dir_t *dd) { - int64_t space; + uint64_t space = 0; int i; ASSERT(MUTEX_HELD(&dd->dd_lock)); - space = dd->dd_phys->dd_used_bytes; - ASSERT(space >= 0); for (i = 0; i < TXG_SIZE; i++) { space += dd->dd_space_towrite[i&TXG_MASK]; ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); @@ -630,13 +626,9 @@ dsl_dir_space_available(dsl_dir_t *dd, mutex_enter(&dd->dd_lock); if (dd->dd_phys->dd_quota != 0) quota = dd->dd_phys->dd_quota; - if (ondiskonly) { - used = dd->dd_used_bytes; - } else { - used = dsl_dir_estimated_space(dd); - } - if (dd == ancestor) - used += delta; + used = dd->dd_phys->dd_used_bytes; + if (!ondiskonly) + used += dsl_dir_space_towrite(dd); if (dd->dd_parent == NULL) { uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); @@ -651,6 +643,14 @@ dsl_dir_space_available(dsl_dir_t *dd, parentspace += dd->dd_phys->dd_reserved - used; } + if (dd == ancestor) { + ASSERT(delta <= 0); + ASSERT(used >= -delta); + used += delta; + if (parentspace != UINT64_MAX) + parentspace -= delta; + } + if (used > quota) { /* over quota */ myspace = 0; @@ -678,50 +678,68 @@ dsl_dir_space_available(dsl_dir_t *dd, struct tempreserve { list_node_t tr_node; + dsl_pool_t *tr_dp; dsl_dir_t *tr_ds; uint64_t tr_size; }; -/* - * Reserve space in this dsl_dir, to be used in this tx's txg. - * After the space has been dirtied (and thus - * dsl_dir_willuse_space() has been called), the reservation should - * be canceled, using dsl_dir_tempreserve_clear(). - */ static int -dsl_dir_tempreserve_impl(dsl_dir_t *dd, - uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) +dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, + boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list, + dmu_tx_t *tx, boolean_t first) { uint64_t txg = tx->tx_txg; - uint64_t est_used, quota, parent_rsrv; - int edquot = EDQUOT; + uint64_t est_inflight, used_on_disk, quota, parent_rsrv; + struct tempreserve *tr; + int enospc = EDQUOT; int txgidx = txg & TXG_MASK; int i; - struct tempreserve *tr; + uint64_t ref_rsrv = 0; ASSERT3U(txg, !=, 0); - ASSERT3S(asize, >=, 0); + ASSERT3S(asize, >, 0); mutex_enter(&dd->dd_lock); + /* * Check against the dsl_dir's quota. We don't add in the delta * when checking for over-quota because they get one free hit. */ - est_used = dsl_dir_estimated_space(dd); + est_inflight = dsl_dir_space_towrite(dd); for (i = 0; i < TXG_SIZE; i++) - est_used += dd->dd_tempreserved[i]; + est_inflight += dd->dd_tempreserved[i]; + used_on_disk = dd->dd_phys->dd_used_bytes; - quota = UINT64_MAX; + /* + * On the first iteration, fetch the dataset's used-on-disk and + * refreservation values. Also, if checkrefquota is set, test if + * allocating this space would exceed the dataset's refquota. + */ + if (first && tx->tx_objset) { + int error; + dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset; + + error = dsl_dataset_check_quota(ds, checkrefquota, + asize, est_inflight, &used_on_disk, &ref_rsrv); + if (error) { + mutex_exit(&dd->dd_lock); + return (error); + } + } - if (dd->dd_phys->dd_quota) + /* + * If this transaction will result in a net free of space, + * we want to let it through. + */ + if (ignorequota || netfree || dd->dd_phys->dd_quota == 0) + quota = UINT64_MAX; + else quota = dd->dd_phys->dd_quota; /* - * If this transaction will result in a net free of space, we want - * to let it through, but we have to be careful: the space that it - * frees won't become available until *after* this txg syncs. - * Therefore, to ensure that it's possible to remove files from - * a full pool without inducing transient overcommits, we throttle + * Adjust the quota against the actual pool size at the root. + * To ensure that it's possible to remove files from a full + * pool without inducing transient overcommits, we throttle * netfree transactions against a quota that is slightly larger, * but still within the pool's allocation slop. In cases where * we're very close to full, this will allow a steady trickle of @@ -731,47 +749,45 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); if (poolsize < quota) { quota = poolsize; - edquot = ENOSPC; + enospc = ENOSPC; } - } else if (netfree) { - quota = UINT64_MAX; } /* * If they are requesting more space, and our current estimate - * is over quota. They get to try again unless the actual + * is over quota, they get to try again unless the actual * on-disk is over quota and there are no pending changes (which * may free up space for us). */ - if (asize > 0 && est_used > quota) { - if (dd->dd_space_towrite[txg & TXG_MASK] != 0 || - dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 || - dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 || - dd->dd_used_bytes < quota) - edquot = ERESTART; - dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " + if (used_on_disk + est_inflight > quota) { + if (est_inflight > 0 || used_on_disk < quota) + enospc = ERESTART; + dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " "quota=%lluK tr=%lluK err=%d\n", - dd->dd_used_bytes>>10, est_used>>10, - quota>>10, asize>>10, edquot); + used_on_disk>>10, est_inflight>>10, + quota>>10, asize>>10, enospc); mutex_exit(&dd->dd_lock); - return (edquot); + return (enospc); } /* We need to up our estimated delta before dropping dd_lock */ dd->dd_tempreserved[txgidx] += asize; - parent_rsrv = parent_delta(dd, est_used, asize); + parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, + asize - ref_rsrv); mutex_exit(&dd->dd_lock); - tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); tr->tr_ds = dd; tr->tr_size = asize; list_insert_tail(tr_list, tr); /* see if it's OK with our parent */ if (dd->dd_parent && parent_rsrv) { + boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); + return (dsl_dir_tempreserve_impl(dd->dd_parent, - parent_rsrv, netfree, tr_list, tx)); + parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); } else { return (0); } @@ -779,42 +795,62 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, /* * Reserve space in this dsl_dir, to be used in this tx's txg. - * After the space has been dirtied (and thus - * dsl_dir_willuse_space() has been called), the reservation should - * be canceled, using dsl_dir_tempreserve_clear(). + * After the space has been dirtied (and dsl_dir_willuse_space() + * has been called), the reservation should be canceled, using + * dsl_dir_tempreserve_clear(). */ int -dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, - uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) +dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, + uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx) { - int err = 0; + int err; list_t *tr_list; + if (asize == 0) { + *tr_cookiep = NULL; + return (0); + } + tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); list_create(tr_list, sizeof (struct tempreserve), offsetof(struct tempreserve, tr_node)); - ASSERT3S(asize, >=, 0); + ASSERT3S(asize, >, 0); ASSERT3S(fsize, >=, 0); - err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, - tr_list, tx); - + err = arc_tempreserve_space(lsize, tx->tx_txg); if (err == 0) { struct tempreserve *tr; - err = arc_tempreserve_space(lsize); - if (err == 0) { - tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); - tr->tr_ds = NULL; - tr->tr_size = lsize; - list_insert_tail(tr_list, tr); + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); + tr->tr_size = lsize; + list_insert_tail(tr_list, tr); + + err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx); + } else { + if (err == EAGAIN) { + txg_delay(dd->dd_pool, tx->tx_txg, 1); + err = ERESTART; } + dsl_pool_memory_pressure(dd->dd_pool); + } + + if (err == 0) { + struct tempreserve *tr; + + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); + tr->tr_dp = dd->dd_pool; + tr->tr_size = asize; + list_insert_tail(tr_list, tr); + + err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, + FALSE, asize > usize, tr_list, tx, TRUE); } if (err) dsl_dir_tempreserve_clear(tr_list, tx); else *tr_cookiep = tr_list; + return (err); } @@ -831,15 +867,20 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) ASSERT3U(tx->tx_txg, !=, 0); + if (tr_cookie == NULL) + return; + while (tr = list_head(tr_list)) { - if (tr->tr_ds == NULL) { - arc_tempreserve_clear(tr->tr_size); - } else { + if (tr->tr_dp) { + dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx); + } else if (tr->tr_ds) { mutex_enter(&tr->tr_ds->dd_lock); ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, tr->tr_size); tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; mutex_exit(&tr->tr_ds->dd_lock); + } else { + arc_tempreserve_clear(tr->tr_size); } list_remove(tr_list, tr); kmem_free(tr, sizeof (struct tempreserve)); @@ -848,13 +889,8 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) kmem_free(tr_list, sizeof (list_t)); } -/* - * Call in open context when we think we're going to write/free space, - * eg. when dirtying data. Be conservative (ie. OK to write less than - * this or free more than this, but don't write more or free less). - */ -void -dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) +static void +dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) { int64_t parent_space; uint64_t est_used; @@ -863,7 +899,7 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) if (space > 0) dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; - est_used = dsl_dir_estimated_space(dd); + est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes; parent_space = parent_delta(dd, est_used, space); mutex_exit(&dd->dd_lock); @@ -872,39 +908,96 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) /* XXX this is potentially expensive and unnecessary... */ if (parent_space && dd->dd_parent) - dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); + dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx); +} + +/* + * Call in open context when we think we're going to write/free space, + * eg. when dirtying data. Be conservative (ie. OK to write less than + * this or free more than this, but don't write more or free less). + */ +void +dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) +{ + dsl_pool_willuse_space(dd->dd_pool, space, tx); + dsl_dir_willuse_space_impl(dd, space, tx); } /* call from syncing context when we actually write/free space for this dd */ void -dsl_dir_diduse_space(dsl_dir_t *dd, +dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) { int64_t accounted_delta; + boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(type < DD_USED_NUM); dsl_dir_dirty(dd, tx); - mutex_enter(&dd->dd_lock); - accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); - ASSERT(used >= 0 || dd->dd_used_bytes >= -used); + if (needlock) + mutex_enter(&dd->dd_lock); + accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); + ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used); ASSERT(compressed >= 0 || dd->dd_phys->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); - dd->dd_used_bytes += used; + dd->dd_phys->dd_used_bytes += used; dd->dd_phys->dd_uncompressed_bytes += uncompressed; dd->dd_phys->dd_compressed_bytes += compressed; - mutex_exit(&dd->dd_lock); + + if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + ASSERT(used > 0 || + dd->dd_phys->dd_used_breakdown[type] >= -used); + dd->dd_phys->dd_used_breakdown[type] += used; +#ifdef DEBUG + dd_used_t t; + uint64_t u = 0; + for (t = 0; t < DD_USED_NUM; t++) + u += dd->dd_phys->dd_used_breakdown[t]; + ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes); +#endif + } + if (needlock) + mutex_exit(&dd->dd_lock); if (dd->dd_parent != NULL) { - dsl_dir_diduse_space(dd->dd_parent, + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, accounted_delta, compressed, uncompressed, tx); + dsl_dir_transfer_space(dd->dd_parent, + used - accounted_delta, + DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); } } -/* ARGSUSED */ +void +dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, + dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) +{ + boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(oldtype < DD_USED_NUM); + ASSERT(newtype < DD_USED_NUM); + + if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) + return; + + dsl_dir_dirty(dd, tx); + if (needlock) + mutex_enter(&dd->dd_lock); + ASSERT(delta > 0 ? + dd->dd_phys->dd_used_breakdown[oldtype] >= delta : + dd->dd_phys->dd_used_breakdown[newtype] >= -delta); + ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); + dd->dd_phys->dd_used_breakdown[oldtype] -= delta; + dd->dd_phys->dd_used_breakdown[newtype] += delta; + if (needlock) + mutex_exit(&dd->dd_lock); +} + static int dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) { @@ -921,22 +1014,22 @@ dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) /* * If we are doing the preliminary check in open context, and * there are pending changes, then don't fail it, since the - * pending changes could under-estimat the amount of space to be + * pending changes could under-estimate the amount of space to be * freed up. */ - towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] + - dd->dd_space_towrite[2] + dd->dd_space_towrite[3]; + towrite = dsl_dir_space_towrite(dd); if ((dmu_tx_is_syncing(tx) || towrite == 0) && (new_quota < dd->dd_phys->dd_reserved || - new_quota < dsl_dir_estimated_space(dd))) { + new_quota < dd->dd_phys->dd_used_bytes + towrite)) { err = ENOSPC; } mutex_exit(&dd->dd_lock); return (err); } +/* ARGSUSED */ static void -dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; uint64_t *quotap = arg2; @@ -947,6 +1040,10 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_enter(&dd->dd_lock); dd->dd_phys->dd_quota = new_quota; mutex_exit(&dd->dd_lock); + + spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa, + tx, cr, "%lld dataset = %llu ", + (longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj); } int @@ -958,20 +1055,22 @@ dsl_dir_set_quota(const char *ddname, uint64_t quota) err = dsl_dir_open(ddname, FTAG, &dd, NULL); if (err) return (err); - /* - * If someone removes a file, then tries to set the quota, we - * want to make sure the file freeing takes effect. - */ - txg_wait_open(dd->dd_pool, 0); - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, - dsl_dir_set_quota_sync, dd, "a, 0); + if (quota != dd->dd_phys->dd_quota) { + /* + * If someone removes a file, then tries to set the quota, we + * want to make sure the file freeing takes effect. + */ + txg_wait_open(dd->dd_pool, 0); + + err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, + dsl_dir_set_quota_sync, dd, "a, 0); + } dsl_dir_close(dd, FTAG); return (err); } -/* ARGSUSED */ -static int +int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; @@ -991,7 +1090,7 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) return (0); mutex_enter(&dd->dd_lock); - used = dd->dd_used_bytes; + used = dd->dd_phys->dd_used_bytes; delta = MAX(used, new_reservation) - MAX(used, dd->dd_phys->dd_reserved); mutex_exit(&dd->dd_lock); @@ -1011,8 +1110,9 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) return (0); } +/* ARGSUSED */ static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; uint64_t *reservationp = arg2; @@ -1020,19 +1120,24 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) uint64_t used; int64_t delta; + dmu_buf_will_dirty(dd->dd_dbuf, tx); + mutex_enter(&dd->dd_lock); - used = dd->dd_used_bytes; + used = dd->dd_phys->dd_used_bytes; delta = MAX(used, new_reservation) - MAX(used, dd->dd_phys->dd_reserved); - mutex_exit(&dd->dd_lock); - - dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_reserved = new_reservation; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ - dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, + delta, 0, 0, tx); } + mutex_exit(&dd->dd_lock); + + spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa, + tx, cr, "%lld dataset = %llu", + (longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj); } int @@ -1074,7 +1179,7 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) return (delta); mutex_enter(&dd->dd_lock); - delta = parent_delta(dd, dd->dd_used_bytes, delta); + delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta); mutex_exit(&dd->dd_lock); return (would_change(dd->dd_parent, delta, ancestor)); } @@ -1084,7 +1189,7 @@ struct renamearg { const char *mynewname; }; -/* ARGSUSED */ +/*ARGSUSED*/ static int dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) { @@ -1110,7 +1215,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) if (ra->newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = - MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); + MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); /* no rename into our descendant */ if (closest_common_ancestor(dd, ra->newparent) == dd) @@ -1125,7 +1230,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_dir_t *dd = arg1; struct renamearg *ra = arg2; @@ -1136,15 +1241,24 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); if (ra->newparent != dd->dd_parent) { - uint64_t myspace = - MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); - - dsl_dir_diduse_space(dd->dd_parent, -myspace, + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, + -dd->dd_phys->dd_used_bytes, -dd->dd_phys->dd_compressed_bytes, -dd->dd_phys->dd_uncompressed_bytes, tx); - dsl_dir_diduse_space(ra->newparent, myspace, + dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD, + dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_compressed_bytes, dd->dd_phys->dd_uncompressed_bytes, tx); + + if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) { + uint64_t unused_rsrv = dd->dd_phys->dd_reserved - + dd->dd_phys->dd_used_bytes; + + dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, + -unused_rsrv, 0, 0, tx); + dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV, + unused_rsrv, 0, 0, tx); + } } dmu_buf_will_dirty(dd->dd_dbuf, tx); @@ -1164,6 +1278,9 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT3U(err, ==, 0); + + spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, + tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); } int @@ -1189,7 +1306,6 @@ dsl_dir_rename(dsl_dir_t *dd, const char *newname) goto out; } - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); |