aboutsummaryrefslogtreecommitdiff
path: root/cddl
diff options
context:
space:
mode:
authorAndriy Gapon <avg@FreeBSD.org>2019-10-07 08:14:45 +0000
committerAndriy Gapon <avg@FreeBSD.org>2019-10-07 08:14:45 +0000
commit862c20fd89e6153d74f136d9973a21de636ee5a4 (patch)
treed8b638e1bc5eb820daf3e11fbf9f36f02228867b /cddl
parent0a457350fd141cd3e44f39eaea547f81b81e5df0 (diff)
parent9aab037f9285496e41e79c4d8c5d456acb8ec137 (diff)
downloadsrc-862c20fd89e6153d74f136d9973a21de636ee5a4.tar.gz
src-862c20fd89e6153d74f136d9973a21de636ee5a4.zip
MFV r350898, r351075: 8423 8199 7432 Implement large_dnode pool feature
8423 8199 7432 Implement large_dnode pool feature 7432 Large dnode pool feature 8199 multi-threaded dmu_object_alloc() 8423 Implement large_dnode pool feature 10406 large_dnode changes broke zfs recv of legacy stream llumos/illumos-gate@54811da5ac6b517992fdc173df5d605e4e61fdc0 https://github.com/illumos/illumos-gate/commit/54811da5ac6b517992fdc173df5d605e4e61fdc0 https://www.illumos.org/issues/8423 https://www.illumos.org/issues/8199 https://www.illumos.org/issues/7432 illumos/illumos-gate@811964cd9f1fbae0fc3b93d116269e9b1fca090a https://github.com/illumos/illumos-gate/commit/811964cd9f1fbae0fc3b93d116269e9b1fca090a https://www.illumos.org/issues/10406 ZoL issues: Improved dnode allocation #6564 Clean up large dnode code #6262 Fix dnode_hold() freeing dnode behavior #8172 Fix dnode allocation race #6414, #6439 Partial: Raw sends must be able to decrease nlevels #6821, #6864 Remove unnecessary txg syncs from receive_object() Closes #7197 This updates FreeBSD large_dnode code (that was imported from ZoL) to a version that was committed to illumos. It has some cleanups, improvements and fixes comparing to what we have in FreeBSD now. I think that the most significant update is 8199 multi-threaded dmu_object_alloc(). This commit reverts r351077 that was a revert of r351074 and r351076 and restores those changes. Required atomic operations should be available now on all platforms where we build ZFS. Obtained from: illumos MFC after: 3 weeks
Notes
Notes: svn path=/head/; revision=353176
Diffstat (limited to 'cddl')
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c43
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c18
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c10
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c56
4 files changed, 93 insertions, 34 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index 68d75fcbcdb4..f02c2af5ef5f 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -2134,7 +2134,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
};
static void
-dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
+dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
+ uint64_t *dnode_slots_used)
{
dmu_buf_t *db = NULL;
dmu_object_info_t doi;
@@ -2154,7 +2155,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
if (*print_header) {
- (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n",
+ (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n",
"Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
"lsize", "%full", "type");
*print_header = 0;
@@ -2173,6 +2174,9 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
dmu_object_info_from_dnode(dn, &doi);
+ if (dnode_slots_used != NULL)
+ *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
+
zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
@@ -2195,8 +2199,9 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
ZDB_COMPRESS_NAME(doi.doi_compress));
}
- (void) printf("%10lld %3u %5s %5s %5s %6s %5s %6s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
+ (void) printf("%10" PRIu64
+ " %3u %5s %5s %5s %5s %5s %6s %s%s\n",
+ object, doi.doi_indirection, iblk, dblk,
asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
@@ -2305,6 +2310,9 @@ dump_dir(objset_t *os)
int print_header = 1;
unsigned i;
int error;
+ uint64_t total_slots_used = 0;
+ uint64_t max_slot_used = 0;
+ uint64_t dnode_slots;
/* make sure nicenum has enough space */
CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
@@ -2349,7 +2357,7 @@ dump_dir(objset_t *os)
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
- &print_header);
+ &print_header, NULL);
(void) printf("\n");
return;
}
@@ -2374,22 +2382,37 @@ dump_dir(objset_t *os)
if (BP_IS_HOLE(os->os_rootbp))
return;
- dump_object(os, 0, verbosity, &print_header);
+ dump_object(os, 0, verbosity, &print_header, NULL);
object_count = 0;
if (DMU_USERUSED_DNODE(os) != NULL &&
DMU_USERUSED_DNODE(os)->dn_type != 0) {
- dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
- dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
+ dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
+ NULL);
+ dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
+ NULL);
}
object = 0;
while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
- dump_object(os, object, verbosity, &print_header);
+ dump_object(os, object, verbosity, &print_header, &dnode_slots);
object_count++;
+ total_slots_used += dnode_slots;
+ max_slot_used = object + dnode_slots - 1;
}
(void) printf("\n");
+ (void) printf(" Dnode slots:\n");
+ (void) printf("\tTotal used: %10llu\n",
+ (u_longlong_t)total_slots_used);
+ (void) printf("\tMax used: %10llu\n",
+ (u_longlong_t)max_slot_used);
+ (void) printf("\tPercent empty: %10lf\n",
+ (double)(max_slot_used - total_slots_used)*100 /
+ (double)max_slot_used);
+
+ (void) printf("\n");
+
if (error != ESRCH) {
(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
abort();
@@ -2581,7 +2604,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
return (dump_path_impl(os, child_obj, s + 1));
/*FALLTHROUGH*/
case DMU_OT_PLAIN_FILE_CONTENTS:
- dump_object(os, child_obj, dump_opt['v'], &header);
+ dump_object(os, child_obj, dump_opt['v'], &header, NULL);
return (0);
default:
(void) fprintf(stderr, "object %llu has non-file/directory "
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index 75b0cd91d262..9f3f23f82da1 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -84,15 +84,15 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, void *arg)
}
(void) printf("%s%s", tab_prefix, ctime(&crtime));
- (void) printf("%sdoid %llu, foid %llu, slots %llu, mode %llo\n", tab_prefix,
- (u_longlong_t)lr->lr_doid,
- (u_longlong_t)LR_FOID_GET_OBJ(lr->lr_foid),
- (u_longlong_t)LR_FOID_GET_SLOTS(lr->lr_foid),
- (longlong_t)lr->lr_mode);
- (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
- tab_prefix,
- (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
- (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
+ (void) printf("%sdoid %" PRIu64 ", foid %" PRIu64 ", slots %" PRIu64
+ ", mode %" PRIo64 "\n",
+ tab_prefix, lr->lr_doid,
+ (uint64_t)LR_FOID_GET_OBJ(lr->lr_foid),
+ (uint64_t)LR_FOID_GET_SLOTS(lr->lr_foid),
+ lr->lr_mode);
+ (void) printf("%suid %" PRIu64 ", gid %" PRIu64 ", gen %" PRIu64
+ ", rdev %#" PRIx64 "\n",
+ tab_prefix, lr->lr_uid, lr->lr_gid, lr->lr_gen, lr->lr_rdev);
}
/* ARGSUSED */
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
index 54edb566ad2f..51c4c8e0e649 100644
--- a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
@@ -416,13 +416,15 @@ main(int argc, char *argv[])
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
}
if (verbose) {
- (void) printf("OBJECT object = %llu type = %u "
- "bonustype = %u blksz = %u bonuslen = %u\n",
- (u_longlong_t)drro->drr_object,
+ (void) printf("OBJECT object = %" PRIu64
+ " type = %u bonustype = %u blksz = %u"
+ " bonuslen = %u dn_slots = %u\n",
+ drro->drr_object,
drro->drr_type,
drro->drr_bonustype,
drro->drr_blksz,
- drro->drr_bonuslen);
+ drro->drr_bonuslen,
+ drro->drr_dn_slots);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index 37acf34ec369..538fd040c95e 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -196,6 +196,7 @@ extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
+extern int dmu_object_alloc_chunk_shift;
extern boolean_t zfs_force_some_double_word_sm_entries;
static ztest_shared_opts_t *ztest_shared_opts;
@@ -322,6 +323,7 @@ static ztest_shared_callstate_t *ztest_shared_callstate;
ztest_func_t ztest_dmu_read_write;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_dmu_object_next_chunk;
ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
ztest_func_t ztest_zap_parallel;
@@ -363,6 +365,7 @@ ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
{ ztest_dmu_write_parallel, 10, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
+ { ztest_dmu_object_next_chunk, 1, &zopt_sometimes },
{ ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
@@ -1366,7 +1369,7 @@ ztest_bt_bonus(dmu_buf_t *db)
* it unique to the object, generation, and offset to verify that data
* is not getting overwritten by data from other dnodes.
*/
-#define ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
+#define ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
(((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
/*
@@ -1895,6 +1898,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
txg, crtxg);
ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
+
dmu_buf_rele(db, FTAG);
(void) ztest_log_setattr(zd, tx, lr);
@@ -3815,8 +3819,10 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
ztest_od_t od[4];
int batchsize = sizeof (od) / sizeof (od[0]);
- for (int b = 0; b < batchsize; b++)
- ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0, 0);
+ for (int b = 0; b < batchsize; b++) {
+ ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER,
+ 0, 0, 0);
+ }
/*
* Destroy the previous batch of objects, create a new batch,
@@ -3831,6 +3837,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
}
/*
+ * Rewind the global allocator to verify object allocation backfilling.
+ */
+void
+ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
+{
+ objset_t *os = zd->zd_os;
+ int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
+ uint64_t object;
+
+ /*
+ * Rewind the global allocator randomly back to a lower object number
+ * to force backfilling and reclamation of recently freed dnodes.
+ */
+ mutex_enter(&os->os_obj_lock);
+ object = ztest_random(os->os_obj_next_chunk);
+ os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
+ mutex_exit(&os->os_obj_lock);
+}
+
+/*
* Verify that dmu_{read,write} work as expected.
*/
void
@@ -3876,8 +3902,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
/*
* Read the directory info. If it's the first time, set things up.
*/
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
- ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4146,8 +4174,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
/*
* Read the directory info. If it's the first time, set things up.
*/
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
- ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4347,7 +4377,8 @@ ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
* to verify that parallel writes to an object -- even to the
* same blocks within the object -- doesn't cause any trouble.
*/
- ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER,
+ 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4366,7 +4397,8 @@ ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
uint64_t blocksize = ztest_random_blocksize();
void *data;
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
return;
@@ -4590,7 +4622,8 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
char name[20], string_value[20];
void *data;
- ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0, 0);
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER,
+ 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -5411,7 +5444,8 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
blocksize = ztest_random_blocksize();
blocksize = MIN(blocksize, 2048); /* because we write so many */
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;