aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c496
1 files changed, 306 insertions, 190 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
index 5e563b632909..48d87f97f669 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
@@ -19,26 +19,28 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_synctask.h>
+#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/zap.h>
#include <sys/zio.h>
#include <sys/arc.h>
+#include <sys/sunddi.h>
#include "zfs_namecheck.h"
-static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
+static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
+static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
+ cred_t *cr, dmu_tx_t *tx);
/* ARGSUSED */
@@ -55,8 +57,6 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
ASSERT(dd->dd_space_towrite[t] == 0);
}
- ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes);
-
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
@@ -91,9 +91,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_object_info_t doi;
dmu_object_info_from_db(dbuf, &doi);
ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
+ ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
}
#endif
- /* XXX assert bonus buffer size is correct */
if (dd == NULL) {
dsl_dir_t *winner;
int err;
@@ -103,7 +103,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_dbuf = dbuf;
dd->dd_pool = dp;
dd->dd_phys = dbuf->db_data;
- dd->dd_used_bytes = dd->dd_phys->dd_used_bytes;
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
@@ -112,36 +111,25 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
if (dd->dd_phys->dd_parent_obj) {
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
- if (err) {
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
- dmu_buf_rele(dbuf, tag);
- return (err);
- }
+ if (err)
+ goto errout;
if (tail) {
#ifdef ZFS_DEBUG
uint64_t foundobj;
err = zap_lookup(dp->dp_meta_objset,
- dd->dd_parent->dd_phys->
- dd_child_dir_zapobj,
+ dd->dd_parent->dd_phys->dd_child_dir_zapobj,
tail, sizeof (foundobj), 1, &foundobj);
ASSERT(err || foundobj == ddobj);
#endif
(void) strcpy(dd->dd_myname, tail);
} else {
err = zap_value_search(dp->dp_meta_objset,
- dd->dd_parent->dd_phys->
- dd_child_dir_zapobj,
- ddobj, dd->dd_myname);
- }
- if (err) {
- dsl_dir_close(dd->dd_parent, dd);
- mutex_destroy(&dd->dd_lock);
- kmem_free(dd, sizeof (dsl_dir_t));
- dmu_buf_rele(dbuf, tag);
- return (err);
+ dd->dd_parent->dd_phys->dd_child_dir_zapobj,
+ ddobj, 0, dd->dd_myname);
}
+ if (err)
+ goto errout;
} else {
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
}
@@ -174,6 +162,15 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
ASSERT3P(dd->dd_dbuf, ==, dbuf);
*ddp = dd;
return (0);
+
+errout:
+ if (dd->dd_parent)
+ dsl_dir_close(dd->dd_parent, dd);
+ mutex_destroy(&dd->dd_lock);
+ kmem_free(dd, sizeof (dsl_dir_t));
+ dmu_buf_rele(dbuf, tag);
+ return (err);
+
}
void
@@ -404,27 +401,37 @@ dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
}
uint64_t
-dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx)
+dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
+ dmu_tx_t *tx)
{
- objset_t *mos = pds->dd_pool->dp_meta_objset;
+ objset_t *mos = dp->dp_meta_objset;
uint64_t ddobj;
dsl_dir_phys_t *dsphys;
dmu_buf_t *dbuf;
ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
- VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
- name, sizeof (uint64_t), 1, &ddobj, tx));
+ if (pds) {
+ VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
+ name, sizeof (uint64_t), 1, &ddobj, tx));
+ } else {
+ /* it's the root dir */
+ VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
+ }
VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
dsphys->dd_creation_time = gethrestime_sec();
- dsphys->dd_parent_obj = pds->dd_object;
+ if (pds)
+ dsphys->dd_parent_obj = pds->dd_object;
dsphys->dd_props_zapobj = zap_create(mos,
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
dsphys->dd_child_dir_zapobj = zap_create(mos,
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
+ dsphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
dmu_buf_rele(dbuf, FTAG);
return (ddobj);
@@ -461,23 +468,27 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
void
-dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
+dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
objset_t *mos = dd->dd_pool->dp_meta_objset;
uint64_t val, obj;
+ dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
/* Remove our reservation. */
val = 0;
- dsl_dir_set_reservation_sync(dd, &val, tx);
- ASSERT3U(dd->dd_used_bytes, ==, 0);
+ dsl_dir_set_reservation_sync(dd, &val, cr, tx);
+ ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
+ for (t = 0; t < DD_USED_NUM; t++)
+ ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
+ VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY(0 == zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
@@ -486,65 +497,53 @@ dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
VERIFY(0 == dmu_object_free(mos, obj, tx));
}
-void
-dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx)
+boolean_t
+dsl_dir_is_clone(dsl_dir_t *dd)
{
- dsl_dir_phys_t *dsp;
- dmu_buf_t *dbuf;
- int error;
-
- *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
- DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
-
- error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET,
- sizeof (uint64_t), 1, ddobjp, tx);
- ASSERT3U(error, ==, 0);
-
- VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf));
- dmu_buf_will_dirty(dbuf, tx);
- dsp = dbuf->db_data;
-
- dsp->dd_creation_time = gethrestime_sec();
- dsp->dd_props_zapobj = zap_create(mos,
- DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
- dsp->dd_child_dir_zapobj = zap_create(mos,
- DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
-
- dmu_buf_rele(dbuf, FTAG);
+ return (dd->dd_phys->dd_origin_obj &&
+ (dd->dd_pool->dp_origin_snap == NULL ||
+ dd->dd_phys->dd_origin_obj !=
+ dd->dd_pool->dp_origin_snap->ds_object));
}
void
dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
{
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
- dsl_dir_space_available(dd, NULL, 0, TRUE));
-
mutex_enter(&dd->dd_lock);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
- dd->dd_phys->dd_quota);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+ dd->dd_phys->dd_used_bytes);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
dd->dd_phys->dd_reserved);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
(dd->dd_phys->dd_uncompressed_bytes * 100 /
dd->dd_phys->dd_compressed_bytes));
+ if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
+ dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
+ dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
+ dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
+ dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
+ dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
+ }
mutex_exit(&dd->dd_lock);
- if (dd->dd_phys->dd_clone_parent_obj) {
+ rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
+ if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
char buf[MAXNAMELEN];
- rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
- VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
- dd->dd_phys->dd_clone_parent_obj,
- NULL, DS_MODE_NONE, FTAG, &ds));
+ VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
+ dd->dd_phys->dd_origin_obj, FTAG, &ds));
dsl_dataset_name(ds, buf);
- dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
- rw_exit(&dd->dd_pool->dp_config_rwlock);
-
+ dsl_dataset_rele(ds, FTAG);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
+ rw_exit(&dd->dd_pool->dp_config_rwlock);
}
void
@@ -580,7 +579,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
- dd->dd_phys->dd_used_bytes = dd->dd_used_bytes;
mutex_exit(&dd->dd_lock);
/* release the hold from dsl_dir_dirty */
@@ -588,15 +586,13 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
}
static uint64_t
-dsl_dir_estimated_space(dsl_dir_t *dd)
+dsl_dir_space_towrite(dsl_dir_t *dd)
{
- int64_t space;
+ uint64_t space = 0;
int i;
ASSERT(MUTEX_HELD(&dd->dd_lock));
- space = dd->dd_phys->dd_used_bytes;
- ASSERT(space >= 0);
for (i = 0; i < TXG_SIZE; i++) {
space += dd->dd_space_towrite[i&TXG_MASK];
ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
@@ -630,13 +626,9 @@ dsl_dir_space_available(dsl_dir_t *dd,
mutex_enter(&dd->dd_lock);
if (dd->dd_phys->dd_quota != 0)
quota = dd->dd_phys->dd_quota;
- if (ondiskonly) {
- used = dd->dd_used_bytes;
- } else {
- used = dsl_dir_estimated_space(dd);
- }
- if (dd == ancestor)
- used += delta;
+ used = dd->dd_phys->dd_used_bytes;
+ if (!ondiskonly)
+ used += dsl_dir_space_towrite(dd);
if (dd->dd_parent == NULL) {
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
@@ -651,6 +643,14 @@ dsl_dir_space_available(dsl_dir_t *dd,
parentspace += dd->dd_phys->dd_reserved - used;
}
+ if (dd == ancestor) {
+ ASSERT(delta <= 0);
+ ASSERT(used >= -delta);
+ used += delta;
+ if (parentspace != UINT64_MAX)
+ parentspace -= delta;
+ }
+
if (used > quota) {
/* over quota */
myspace = 0;
@@ -678,50 +678,68 @@ dsl_dir_space_available(dsl_dir_t *dd,
struct tempreserve {
list_node_t tr_node;
+ dsl_pool_t *tr_dp;
dsl_dir_t *tr_ds;
uint64_t tr_size;
};
-/*
- * Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
- */
static int
-dsl_dir_tempreserve_impl(dsl_dir_t *dd,
- uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx)
+dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
+ boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
+ dmu_tx_t *tx, boolean_t first)
{
uint64_t txg = tx->tx_txg;
- uint64_t est_used, quota, parent_rsrv;
- int edquot = EDQUOT;
+ uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
+ struct tempreserve *tr;
+ int enospc = EDQUOT;
int txgidx = txg & TXG_MASK;
int i;
- struct tempreserve *tr;
+ uint64_t ref_rsrv = 0;
ASSERT3U(txg, !=, 0);
- ASSERT3S(asize, >=, 0);
+ ASSERT3S(asize, >, 0);
mutex_enter(&dd->dd_lock);
+
/*
* Check against the dsl_dir's quota. We don't add in the delta
* when checking for over-quota because they get one free hit.
*/
- est_used = dsl_dir_estimated_space(dd);
+ est_inflight = dsl_dir_space_towrite(dd);
for (i = 0; i < TXG_SIZE; i++)
- est_used += dd->dd_tempreserved[i];
+ est_inflight += dd->dd_tempreserved[i];
+ used_on_disk = dd->dd_phys->dd_used_bytes;
- quota = UINT64_MAX;
+ /*
+ * On the first iteration, fetch the dataset's used-on-disk and
+ * refreservation values. Also, if checkrefquota is set, test if
+ * allocating this space would exceed the dataset's refquota.
+ */
+ if (first && tx->tx_objset) {
+ int error;
+ dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
+
+ error = dsl_dataset_check_quota(ds, checkrefquota,
+ asize, est_inflight, &used_on_disk, &ref_rsrv);
+ if (error) {
+ mutex_exit(&dd->dd_lock);
+ return (error);
+ }
+ }
- if (dd->dd_phys->dd_quota)
+ /*
+ * If this transaction will result in a net free of space,
+ * we want to let it through.
+ */
+ if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
+ quota = UINT64_MAX;
+ else
quota = dd->dd_phys->dd_quota;
/*
- * If this transaction will result in a net free of space, we want
- * to let it through, but we have to be careful: the space that it
- * frees won't become available until *after* this txg syncs.
- * Therefore, to ensure that it's possible to remove files from
- * a full pool without inducing transient overcommits, we throttle
+ * Adjust the quota against the actual pool size at the root.
+ * To ensure that it's possible to remove files from a full
+ * pool without inducing transient overcommits, we throttle
* netfree transactions against a quota that is slightly larger,
* but still within the pool's allocation slop. In cases where
* we're very close to full, this will allow a steady trickle of
@@ -731,47 +749,45 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd,
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
if (poolsize < quota) {
quota = poolsize;
- edquot = ENOSPC;
+ enospc = ENOSPC;
}
- } else if (netfree) {
- quota = UINT64_MAX;
}
/*
* If they are requesting more space, and our current estimate
- * is over quota. They get to try again unless the actual
+ * is over quota, they get to try again unless the actual
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
- if (asize > 0 && est_used > quota) {
- if (dd->dd_space_towrite[txg & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 ||
- dd->dd_used_bytes < quota)
- edquot = ERESTART;
- dprintf_dd(dd, "failing: used=%lluK est_used = %lluK "
+ if (used_on_disk + est_inflight > quota) {
+ if (est_inflight > 0 || used_on_disk < quota)
+ enospc = ERESTART;
+ dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
"quota=%lluK tr=%lluK err=%d\n",
- dd->dd_used_bytes>>10, est_used>>10,
- quota>>10, asize>>10, edquot);
+ used_on_disk>>10, est_inflight>>10,
+ quota>>10, asize>>10, enospc);
mutex_exit(&dd->dd_lock);
- return (edquot);
+ return (enospc);
}
/* We need to up our estimated delta before dropping dd_lock */
dd->dd_tempreserved[txgidx] += asize;
- parent_rsrv = parent_delta(dd, est_used, asize);
+ parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
+ asize - ref_rsrv);
mutex_exit(&dd->dd_lock);
- tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
+ tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
tr->tr_ds = dd;
tr->tr_size = asize;
list_insert_tail(tr_list, tr);
/* see if it's OK with our parent */
if (dd->dd_parent && parent_rsrv) {
+ boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
+
return (dsl_dir_tempreserve_impl(dd->dd_parent,
- parent_rsrv, netfree, tr_list, tx));
+ parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
} else {
return (0);
}
@@ -779,42 +795,62 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd,
/*
* Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
+ * After the space has been dirtied (and dsl_dir_willuse_space()
+ * has been called), the reservation should be canceled, using
+ * dsl_dir_tempreserve_clear().
*/
int
-dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx)
+dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
+ uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
{
- int err = 0;
+ int err;
list_t *tr_list;
+ if (asize == 0) {
+ *tr_cookiep = NULL;
+ return (0);
+ }
+
tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
list_create(tr_list, sizeof (struct tempreserve),
offsetof(struct tempreserve, tr_node));
- ASSERT3S(asize, >=, 0);
+ ASSERT3S(asize, >, 0);
ASSERT3S(fsize, >=, 0);
- err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
- tr_list, tx);
-
+ err = arc_tempreserve_space(lsize, tx->tx_txg);
if (err == 0) {
struct tempreserve *tr;
- err = arc_tempreserve_space(lsize);
- if (err == 0) {
- tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
- tr->tr_ds = NULL;
- tr->tr_size = lsize;
- list_insert_tail(tr_list, tr);
+ tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
+ tr->tr_size = lsize;
+ list_insert_tail(tr_list, tr);
+
+ err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
+ } else {
+ if (err == EAGAIN) {
+ txg_delay(dd->dd_pool, tx->tx_txg, 1);
+ err = ERESTART;
}
+ dsl_pool_memory_pressure(dd->dd_pool);
+ }
+
+ if (err == 0) {
+ struct tempreserve *tr;
+
+ tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
+ tr->tr_dp = dd->dd_pool;
+ tr->tr_size = asize;
+ list_insert_tail(tr_list, tr);
+
+ err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
+ FALSE, asize > usize, tr_list, tx, TRUE);
}
if (err)
dsl_dir_tempreserve_clear(tr_list, tx);
else
*tr_cookiep = tr_list;
+
return (err);
}
@@ -831,15 +867,20 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
ASSERT3U(tx->tx_txg, !=, 0);
+ if (tr_cookie == NULL)
+ return;
+
while (tr = list_head(tr_list)) {
- if (tr->tr_ds == NULL) {
- arc_tempreserve_clear(tr->tr_size);
- } else {
+ if (tr->tr_dp) {
+ dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
+ } else if (tr->tr_ds) {
mutex_enter(&tr->tr_ds->dd_lock);
ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
tr->tr_size);
tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
mutex_exit(&tr->tr_ds->dd_lock);
+ } else {
+ arc_tempreserve_clear(tr->tr_size);
}
list_remove(tr_list, tr);
kmem_free(tr, sizeof (struct tempreserve));
@@ -848,13 +889,8 @@ dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
kmem_free(tr_list, sizeof (list_t));
}
-/*
- * Call in open context when we think we're going to write/free space,
- * eg. when dirtying data. Be conservative (ie. OK to write less than
- * this or free more than this, but don't write more or free less).
- */
-void
-dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
+static void
+dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
{
int64_t parent_space;
uint64_t est_used;
@@ -863,7 +899,7 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
if (space > 0)
dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
- est_used = dsl_dir_estimated_space(dd);
+ est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
parent_space = parent_delta(dd, est_used, space);
mutex_exit(&dd->dd_lock);
@@ -872,39 +908,96 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
/* XXX this is potentially expensive and unnecessary... */
if (parent_space && dd->dd_parent)
- dsl_dir_willuse_space(dd->dd_parent, parent_space, tx);
+ dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
+}
+
+/*
+ * Call in open context when we think we're going to write/free space,
+ * eg. when dirtying data. Be conservative (ie. OK to write less than
+ * this or free more than this, but don't write more or free less).
+ */
+void
+dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
+{
+ dsl_pool_willuse_space(dd->dd_pool, space, tx);
+ dsl_dir_willuse_space_impl(dd, space, tx);
}
/* call from syncing context when we actually write/free space for this dd */
void
-dsl_dir_diduse_space(dsl_dir_t *dd,
+dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
{
int64_t accounted_delta;
+ boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT(type < DD_USED_NUM);
dsl_dir_dirty(dd, tx);
- mutex_enter(&dd->dd_lock);
- accounted_delta = parent_delta(dd, dd->dd_used_bytes, used);
- ASSERT(used >= 0 || dd->dd_used_bytes >= -used);
+ if (needlock)
+ mutex_enter(&dd->dd_lock);
+ accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
+ ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
ASSERT(compressed >= 0 ||
dd->dd_phys->dd_compressed_bytes >= -compressed);
ASSERT(uncompressed >= 0 ||
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
- dd->dd_used_bytes += used;
+ dd->dd_phys->dd_used_bytes += used;
dd->dd_phys->dd_uncompressed_bytes += uncompressed;
dd->dd_phys->dd_compressed_bytes += compressed;
- mutex_exit(&dd->dd_lock);
+
+ if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
+ ASSERT(used > 0 ||
+ dd->dd_phys->dd_used_breakdown[type] >= -used);
+ dd->dd_phys->dd_used_breakdown[type] += used;
+#ifdef DEBUG
+ dd_used_t t;
+ uint64_t u = 0;
+ for (t = 0; t < DD_USED_NUM; t++)
+ u += dd->dd_phys->dd_used_breakdown[t];
+ ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
+#endif
+ }
+ if (needlock)
+ mutex_exit(&dd->dd_lock);
if (dd->dd_parent != NULL) {
- dsl_dir_diduse_space(dd->dd_parent,
+ dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
accounted_delta, compressed, uncompressed, tx);
+ dsl_dir_transfer_space(dd->dd_parent,
+ used - accounted_delta,
+ DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
}
}
-/* ARGSUSED */
+void
+dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
+ dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
+{
+ boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
+
+ ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT(oldtype < DD_USED_NUM);
+ ASSERT(newtype < DD_USED_NUM);
+
+ if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
+ return;
+
+ dsl_dir_dirty(dd, tx);
+ if (needlock)
+ mutex_enter(&dd->dd_lock);
+ ASSERT(delta > 0 ?
+ dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
+ dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
+ ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
+ dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
+ dd->dd_phys->dd_used_breakdown[newtype] += delta;
+ if (needlock)
+ mutex_exit(&dd->dd_lock);
+}
+
static int
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
@@ -921,22 +1014,22 @@ dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
- * pending changes could under-estimat the amount of space to be
+ * pending changes could under-estimate the amount of space to be
* freed up.
*/
- towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] +
- dd->dd_space_towrite[2] + dd->dd_space_towrite[3];
+ towrite = dsl_dir_space_towrite(dd);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(new_quota < dd->dd_phys->dd_reserved ||
- new_quota < dsl_dir_estimated_space(dd))) {
+ new_quota < dd->dd_phys->dd_used_bytes + towrite)) {
err = ENOSPC;
}
mutex_exit(&dd->dd_lock);
return (err);
}
+/* ARGSUSED */
static void
-dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *quotap = arg2;
@@ -947,6 +1040,10 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = new_quota;
mutex_exit(&dd->dd_lock);
+
+ spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
+ tx, cr, "%lld dataset = %llu ",
+ (longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj);
}
int
@@ -958,20 +1055,22 @@ dsl_dir_set_quota(const char *ddname, uint64_t quota)
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err)
return (err);
- /*
- * If someone removes a file, then tries to set the quota, we
- * want to make sure the file freeing takes effect.
- */
- txg_wait_open(dd->dd_pool, 0);
- err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
- dsl_dir_set_quota_sync, dd, &quota, 0);
+ if (quota != dd->dd_phys->dd_quota) {
+ /*
+ * If someone removes a file, then tries to set the quota, we
+ * want to make sure the file freeing takes effect.
+ */
+ txg_wait_open(dd->dd_pool, 0);
+
+ err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
+ dsl_dir_set_quota_sync, dd, &quota, 0);
+ }
dsl_dir_close(dd, FTAG);
return (err);
}
-/* ARGSUSED */
-static int
+int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
@@ -991,7 +1090,7 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
mutex_enter(&dd->dd_lock);
- used = dd->dd_used_bytes;
+ used = dd->dd_phys->dd_used_bytes;
delta = MAX(used, new_reservation) -
MAX(used, dd->dd_phys->dd_reserved);
mutex_exit(&dd->dd_lock);
@@ -1011,8 +1110,9 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
}
+/* ARGSUSED */
static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *reservationp = arg2;
@@ -1020,19 +1120,24 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
uint64_t used;
int64_t delta;
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
mutex_enter(&dd->dd_lock);
- used = dd->dd_used_bytes;
+ used = dd->dd_phys->dd_used_bytes;
delta = MAX(used, new_reservation) -
MAX(used, dd->dd_phys->dd_reserved);
- mutex_exit(&dd->dd_lock);
-
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
dd->dd_phys->dd_reserved = new_reservation;
if (dd->dd_parent != NULL) {
/* Roll up this additional usage into our ancestors */
- dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx);
+ dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
+ delta, 0, 0, tx);
}
+ mutex_exit(&dd->dd_lock);
+
+ spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
+ tx, cr, "%lld dataset = %llu",
+ (longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj);
}
int
@@ -1074,7 +1179,7 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
return (delta);
mutex_enter(&dd->dd_lock);
- delta = parent_delta(dd, dd->dd_used_bytes, delta);
+ delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
mutex_exit(&dd->dd_lock);
return (would_change(dd->dd_parent, delta, ancestor));
}
@@ -1084,7 +1189,7 @@ struct renamearg {
const char *mynewname;
};
-/* ARGSUSED */
+/*ARGSUSED*/
static int
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
@@ -1110,7 +1215,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (ra->newparent != dd->dd_parent) {
/* is there enough space? */
uint64_t myspace =
- MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
+ MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendant */
if (closest_common_ancestor(dd, ra->newparent) == dd)
@@ -1125,7 +1230,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
-dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
@@ -1136,15 +1241,24 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
if (ra->newparent != dd->dd_parent) {
- uint64_t myspace =
- MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
-
- dsl_dir_diduse_space(dd->dd_parent, -myspace,
+ dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
+ -dd->dd_phys->dd_used_bytes,
-dd->dd_phys->dd_compressed_bytes,
-dd->dd_phys->dd_uncompressed_bytes, tx);
- dsl_dir_diduse_space(ra->newparent, myspace,
+ dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
+ dd->dd_phys->dd_used_bytes,
dd->dd_phys->dd_compressed_bytes,
dd->dd_phys->dd_uncompressed_bytes, tx);
+
+ if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
+ uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
+ dd->dd_phys->dd_used_bytes;
+
+ dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
+ -unused_rsrv, 0, 0, tx);
+ dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
+ unused_rsrv, 0, 0, tx);
+ }
}
dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -1164,6 +1278,9 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT3U(err, ==, 0);
+
+ spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
+ tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
}
int
@@ -1189,7 +1306,6 @@ dsl_dir_rename(dsl_dir_t *dd, const char *newname)
goto out;
}
-
err = dsl_sync_task_do(dd->dd_pool,
dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);