aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/zfs/zap_micro.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/zap_micro.c')
-rw-r--r--sys/contrib/openzfs/module/zfs/zap_micro.c467
1 files changed, 296 insertions, 171 deletions
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 09780a6c5989..d806988af96d 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -33,7 +33,7 @@
#include <sys/zap.h>
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
-#include <sys/avl.h>
+#include <sys/btree.h>
#include <sys/arc.h>
#include <sys/dmu_objset.h>
@@ -41,6 +41,8 @@
#include <sys/sunddi.h>
#endif
+int zap_micro_max_size = MZAP_MAX_BLKSZ;
+
static int mzap_upgrade(zap_t **zapp,
const void *tag, dmu_tx_t *tx, zap_flags_t flags);
@@ -92,7 +94,7 @@ zap_hash(zap_name_t *zn)
wp++, i++) {
uint64_t word = *wp;
- for (int j = 0; j < zn->zn_key_intlen; j++) {
+ for (int j = 0; j < 8; j++) {
h = (h >> 8) ^
zfs_crc64_table[(h ^ word) & 0xFF];
word >>= NBBY;
@@ -162,18 +164,25 @@ zap_match(zap_name_t *zn, const char *matchname)
}
}
+static zap_name_t *
+zap_name_alloc(zap_t *zap)
+{
+ zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zn->zn_zap = zap;
+ return (zn);
+}
+
void
zap_name_free(zap_name_t *zn)
{
kmem_free(zn, sizeof (zap_name_t));
}
-zap_name_t *
-zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
+static int
+zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
{
- zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
+ zap_t *zap = zn->zn_zap;
- zn->zn_zap = zap;
zn->zn_key_intlen = sizeof (*key);
zn->zn_key_orig = key;
zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
@@ -194,17 +203,13 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the hash is computed from.
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zap->zap_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zap->zap_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_normbuf;
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
} else {
- if (mt != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ if (mt != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm = zn->zn_key_orig;
zn->zn_key_norm_numints = zn->zn_key_orig_numints;
}
@@ -217,13 +222,22 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
* what the matching is based on. (Not the hash!)
*/
if (zap_normalize(zap, key, zn->zn_normbuf,
- zn->zn_normflags) != 0) {
- zap_name_free(zn);
- return (NULL);
- }
+ zn->zn_normflags) != 0)
+ return (SET_ERROR(ENOTSUP));
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
}
+ return (0);
+}
+
+zap_name_t *
+zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
+{
+ zap_name_t *zn = zap_name_alloc(zap);
+ if (zap_name_init_str(zn, key, mt) != 0) {
+ zap_name_free(zn);
+ return (NULL);
+ }
return (zn);
}
@@ -271,51 +285,56 @@ zap_byteswap(void *buf, size_t size)
}
}
+__attribute__((always_inline)) inline
static int
mze_compare(const void *arg1, const void *arg2)
{
const mzap_ent_t *mze1 = arg1;
const mzap_ent_t *mze2 = arg2;
- int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash);
- if (likely(cmp))
- return (cmp);
-
- return (TREE_CMP(mze1->mze_cd, mze2->mze_cd));
+ return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd,
+ (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
}
+ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
+ mze_compare)
+
static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash)
+mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
{
+ mzap_ent_t mze;
+
ASSERT(zap->zap_ismicro);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
- mze->mze_chunkid = chunkid;
- mze->mze_hash = hash;
- mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
- ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
- avl_add(&zap->zap_m.zap_avl, mze);
+ mze.mze_chunkid = chunkid;
+ ASSERT0(hash & 0xffffffff);
+ mze.mze_hash = hash >> 32;
+ ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff);
+ mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd;
+ ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0);
+ zfs_btree_add(&zap->zap_m.zap_tree, &mze);
}
static mzap_ent_t *
-mze_find(zap_name_t *zn)
+mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
{
mzap_ent_t mze_tofind;
mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
+ zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree;
ASSERT(zn->zn_zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
- mze_tofind.mze_hash = zn->zn_hash;
+ ASSERT0(zn->zn_hash & 0xffffffff);
+ mze_tofind.mze_hash = zn->zn_hash >> 32;
mze_tofind.mze_cd = 0;
- mze = avl_find(avl, &mze_tofind, &idx);
+ mze = zfs_btree_find(tree, &mze_tofind, idx);
if (mze == NULL)
- mze = avl_nearest(avl, idx, AVL_AFTER);
- for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
+ mze = zfs_btree_next(tree, idx, idx);
+ for (; mze && mze->mze_hash == mze_tofind.mze_hash;
+ mze = zfs_btree_next(tree, idx, idx)) {
ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
return (mze);
@@ -328,18 +347,21 @@ static uint32_t
mze_find_unused_cd(zap_t *zap, uint64_t hash)
{
mzap_ent_t mze_tofind;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
ASSERT(zap->zap_ismicro);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
uint32_t cd = 0;
- for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
if (mze->mze_cd != cd)
break;
cd++;
@@ -364,16 +386,18 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
{
zap_t *zap = zn->zn_zap;
mzap_ent_t mze_tofind;
- mzap_ent_t *mze;
- avl_index_t idx;
- avl_tree_t *avl = &zap->zap_m.zap_avl;
+ zfs_btree_index_t idx;
+ zfs_btree_t *tree = &zap->zap_m.zap_tree;
uint32_t mzap_ents = 0;
+ ASSERT0(hash & 0xffffffff);
+ hash >>= 32;
mze_tofind.mze_hash = hash;
mze_tofind.mze_cd = 0;
- for (mze = avl_find(avl, &mze_tofind, &idx);
- mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
+ for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx);
+ mze && mze->mze_hash == hash;
+ mze = zfs_btree_next(tree, &idx, &idx)) {
mzap_ents++;
}
@@ -384,28 +408,14 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
}
static void
-mze_remove(zap_t *zap, mzap_ent_t *mze)
-{
- ASSERT(zap->zap_ismicro);
- ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
- avl_remove(&zap->zap_m.zap_avl, mze);
- kmem_free(mze, sizeof (mzap_ent_t));
-}
-
-static void
mze_destroy(zap_t *zap)
{
- mzap_ent_t *mze;
- void *avlcookie = NULL;
-
- while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)))
- kmem_free(mze, sizeof (mzap_ent_t));
- avl_destroy(&zap->zap_m.zap_avl);
+ zfs_btree_clear(&zap->zap_m.zap_tree);
+ zfs_btree_destroy(&zap->zap_m.zap_tree);
}
static zap_t *
-mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
+mzap_open(dmu_buf_t *db)
{
zap_t *winner;
uint64_t *zap_hdr = (uint64_t *)db->db_data;
@@ -417,8 +427,8 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL);
rw_enter(&zap->zap_rwlock, RW_WRITER);
- zap->zap_objset = os;
- zap->zap_object = obj;
+ zap->zap_objset = dmu_buf_get_objset(db);
+ zap->zap_object = db->db_object;
zap->zap_dbuf = db;
if (zap_block_type != ZBT_MICRO) {
@@ -448,21 +458,26 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap->zap_salt = zap_m_phys(zap)->mz_salt;
zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
- avl_create(&zap->zap_m.zap_avl, mze_compare,
- sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
- for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) {
+ /*
+ * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
+ * overhead on massive inserts below. It still allows to store
+ * 62 entries before we have to add 2KB B-tree core node.
+ */
+ zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
+ mze_find_in_buf, sizeof (mzap_ent_t), 512);
+
+ zap_name_t *zn = zap_name_alloc(zap);
+ for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze =
&zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0]) {
- zap_name_t *zn;
-
zap->zap_m.zap_num_entries++;
- zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
mze_insert(zap, i, zn->zn_hash);
- zap_name_free(zn);
}
}
+ zap_name_free(zn);
} else {
zap->zap_salt = zap_f_phys(zap)->zap_salt;
zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
@@ -503,7 +518,7 @@ handle_winner:
* have the specified tag.
*/
static int
-zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
+zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
{
ASSERT0(db->db_offset);
@@ -513,13 +528,13 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
*zapp = NULL;
- dmu_object_info_from_db(db, &doi);
+ dmu_object_info_from_dnode(dn, &doi);
if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
return (SET_ERROR(EINVAL));
zap_t *zap = dmu_buf_get_user(db);
if (zap == NULL) {
- zap = mzap_open(os, obj, db);
+ zap = mzap_open(db);
if (zap == NULL) {
/*
* mzap_open() didn't like what it saw on-disk.
@@ -548,6 +563,7 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
}
zap->zap_objset = os;
+ zap->zap_dnode = dn;
if (lt == RW_WRITER)
dmu_buf_will_dirty(db, tx);
@@ -559,7 +575,7 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
if (zap->zap_ismicro && tx && adding &&
zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
- if (newsz > MZAP_MAX_BLKSZ) {
+ if (newsz > zap_micro_max_size) {
dprintf("upgrading obj %llu: num_entries=%u\n",
(u_longlong_t)obj, zap->zap_m.zap_num_entries);
*zapp = zap;
@@ -583,23 +599,16 @@ zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
zap_t **zapp)
{
dmu_buf_t *db;
+ int err;
- int err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
- if (err != 0) {
+ err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ if (err != 0)
return (err);
- }
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(db, &doi);
- ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
- }
-#endif
-
- err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
- if (err != 0) {
+ err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
+ if (err != 0)
dmu_buf_rele(db, tag);
- }
+ else
+ VERIFY(dnode_add_ref(dn, tag));
return (err);
}
@@ -608,21 +617,23 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
zap_t **zapp)
{
+ dnode_t *dn;
dmu_buf_t *db;
+ int err;
- int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ err = dnode_hold(os, obj, tag, &dn);
if (err != 0)
return (err);
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(db, &doi);
- ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
+ err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ if (err != 0) {
+ dnode_rele(dn, tag);
+ return (err);
}
-#endif
- err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
- if (err != 0)
+ err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
+ if (err != 0) {
dmu_buf_rele(db, tag);
+ dnode_rele(dn, tag);
+ }
return (err);
}
@@ -630,6 +641,7 @@ void
zap_unlockdir(zap_t *zap, const void *tag)
{
rw_exit(&zap->zap_rwlock);
+ dnode_rele(zap->zap_dnode, tag);
dmu_buf_rele(zap->zap_dbuf, tag);
}
@@ -657,24 +669,25 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
dprintf("upgrading obj=%llu with %u chunks\n",
(u_longlong_t)zap->zap_object, nchunks);
- /* XXX destroy the avl later, so we can use the stored hash value */
+ /* XXX destroy the tree later, so we can use the stored hash value */
mze_destroy(zap);
fzap_upgrade(zap, tx, flags);
+ zap_name_t *zn = zap_name_alloc(zap);
for (int i = 0; i < nchunks; i++) {
mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
if (mze->mze_name[0] == 0)
continue;
dprintf("adding %s=%llu\n",
mze->mze_name, (u_longlong_t)mze->mze_value);
- zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0);
+ zap_name_init_str(zn, mze->mze_name, 0);
/* If we fail here, we would end up losing entries */
VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
tag, tx));
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
- zap_name_free(zn);
}
+ zap_name_free(zn);
vmem_free(mzp, sz);
*zapp = zap;
return (0);
@@ -714,7 +727,8 @@ mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx)
if (flags != 0) {
zap_t *zap;
/* Only fat zap supports flags; upgrade immediately. */
- VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER,
+ VERIFY(dnode_add_ref(dn, FTAG));
+ VERIFY0(zap_lockdir_impl(dn, db, FTAG, tx, RW_WRITER,
B_FALSE, B_FALSE, &zap));
VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags));
zap_unlockdir(zap, FTAG);
@@ -916,22 +930,23 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
* See also the comment above zap_entry_normalization_conflict().
*/
static boolean_t
-mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
+mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
+ zfs_btree_index_t *idx)
{
- int direction = AVL_BEFORE;
boolean_t allocdzn = B_FALSE;
+ mzap_ent_t *other;
+ zfs_btree_index_t oidx;
if (zap->zap_normflags == 0)
return (B_FALSE);
-again:
- for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
+ for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx);
other && other->mze_hash == mze->mze_hash;
- other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
+ other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) {
if (zn == NULL) {
- zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
- MT_NORMALIZE);
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
@@ -941,9 +956,20 @@ again:
}
}
- if (direction == AVL_BEFORE) {
- direction = AVL_AFTER;
- goto again;
+ for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx);
+ other && other->mze_hash == mze->mze_hash;
+ other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) {
+
+ if (zn == NULL) {
+ zn = zap_name_alloc_str(zap,
+ MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE);
+ allocdzn = B_TRUE;
+ }
+ if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
+ if (allocdzn)
+ zap_name_free(zn);
+ return (B_TRUE);
+ }
}
if (allocdzn)
@@ -971,7 +997,7 @@ zap_lookup_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
@@ -979,7 +1005,8 @@ zap_lookup_impl(zap_t *zap, const char *name,
err = fzap_lookup(zn, integer_size, num_integers, buf,
realname, rn_len, ncp);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -990,11 +1017,13 @@ zap_lookup_impl(zap_t *zap, const char *name,
} else {
*(uint64_t *)buf =
MZE_PHYS(zap, mze)->mze_value;
- (void) strlcpy(realname,
- MZE_PHYS(zap, mze)->mze_name, rn_len);
+ if (realname != NULL)
+ (void) strlcpy(realname,
+ MZE_PHYS(zap, mze)->mze_name,
+ rn_len);
if (ncp) {
*ncp = mzap_normalization_conflict(zap,
- zn, mze);
+ zn, mze, &idx);
}
}
}
@@ -1031,7 +1060,7 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, 0);
+ zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1134,7 +1163,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1142,7 +1171,8 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
if (!zap->zap_ismicro) {
err = fzap_length(zn, integer_size, num_integers);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
@@ -1182,7 +1212,7 @@ static void
mzap_addent(zap_name_t *zn, uint64_t value)
{
zap_t *zap = zn->zn_zap;
- int start = zap->zap_m.zap_alloc_next;
+ uint16_t start = zap->zap_m.zap_alloc_next;
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
@@ -1198,7 +1228,7 @@ mzap_addent(zap_name_t *zn, uint64_t value)
ASSERT(cd < zap_maxcd(zap));
again:
- for (int i = start; i < zap->zap_m.zap_num_chunks; i++) {
+ for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) {
mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
if (mze->mze_name[0] == 0) {
mze->mze_value = value;
@@ -1229,7 +1259,7 @@ zap_add_impl(zap_t *zap, const char *key,
const uint64_t *intval = val;
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, key, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
if (zn == NULL) {
zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
@@ -1247,7 +1277,8 @@ zap_add_impl(zap_t *zap, const char *key,
}
zap = zn->zn_zap; /* fzap_add() may change zap */
} else {
- if (mze_find(zn) != NULL) {
+ zfs_btree_index_t idx;
+ if (mze_find(zn, &idx) != NULL) {
err = SET_ERROR(EEXIST);
} else {
mzap_addent(zn, *intval);
@@ -1292,6 +1323,26 @@ zap_add_by_dnode(dnode_t *dn, const char *key,
return (err);
}
+static int
+zap_add_uint64_impl(zap_t *zap, const uint64_t *key,
+ int key_numints, int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx, const void *tag)
+{
+ int err;
+
+ zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+ if (zn == NULL) {
+ zap_unlockdir(zap, tag);
+ return (SET_ERROR(ENOTSUP));
+ }
+ err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
+ zap = zn->zn_zap; /* fzap_add() may change zap */
+ zap_name_free(zn);
+ if (zap != NULL) /* may be NULL if fzap_add() failed */
+ zap_unlockdir(zap, tag);
+ return (err);
+}
+
int
zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
@@ -1303,16 +1354,26 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
- if (zn == NULL) {
- zap_unlockdir(zap, FTAG);
- return (SET_ERROR(ENOTSUP));
- }
- err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
- zap = zn->zn_zap; /* fzap_add() may change zap */
- zap_name_free(zn);
- if (zap != NULL) /* may be NULL if fzap_add() failed */
- zap_unlockdir(zap, FTAG);
+ err = zap_add_uint64_impl(zap, key, key_numints,
+ integer_size, num_integers, val, tx, FTAG);
+ /* zap_add_uint64_impl() calls zap_unlockdir() */
+ return (err);
+}
+
+int
+zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+ int key_numints, int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx)
+{
+ zap_t *zap;
+
+ int err =
+ zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_add_uint64_impl(zap, key, key_numints,
+ integer_size, num_integers, val, tx, FTAG);
+ /* zap_add_uint64_impl() calls zap_unlockdir() */
return (err);
}
@@ -1327,7 +1388,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc(zap, name, 0);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
if (zn == NULL) {
zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
@@ -1348,7 +1409,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
}
zap = zn->zn_zap; /* fzap_update() may change zap */
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze != NULL) {
MZE_PHYS(zap, mze)->mze_value = *intval;
} else {
@@ -1362,27 +1424,56 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
return (err);
}
-int
-zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
- int key_numints,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
+static int
+zap_update_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
+ int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx,
+ const void *tag)
{
- zap_t *zap;
+ int err;
- int err =
- zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
- if (err != 0)
- return (err);
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap, FTAG);
+ zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
}
- err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
+ err = fzap_update(zn, integer_size, num_integers, val, tag, tx);
zap = zn->zn_zap; /* fzap_update() may change zap */
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
- zap_unlockdir(zap, FTAG);
+ zap_unlockdir(zap, tag);
+ return (err);
+}
+
+int
+zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+ int key_numints, int integer_size, uint64_t num_integers, const void *val,
+ dmu_tx_t *tx)
+{
+ zap_t *zap;
+
+ int err =
+ zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_update_uint64_impl(zap, key, key_numints,
+ integer_size, num_integers, val, tx, FTAG);
+ /* zap_update_uint64_impl() calls zap_unlockdir() */
+ return (err);
+}
+
+int
+zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
+ int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
+{
+ zap_t *zap;
+
+ int err =
+ zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_update_uint64_impl(zap, key, key_numints,
+ integer_size, num_integers, val, tx, FTAG);
+ /* zap_update_uint64_impl() calls zap_unlockdir() */
return (err);
}
@@ -1398,20 +1489,20 @@ zap_remove_impl(zap_t *zap, const char *name,
{
int err = 0;
- zap_name_t *zn = zap_name_alloc(zap, name, mt);
+ zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
if (zn == NULL)
return (SET_ERROR(ENOTSUP));
if (!zap->zap_ismicro) {
err = fzap_remove(zn, tx);
} else {
- mzap_ent_t *mze = mze_find(zn);
+ zfs_btree_index_t idx;
+ mzap_ent_t *mze = mze_find(zn, &idx);
if (mze == NULL) {
err = SET_ERROR(ENOENT);
} else {
zap->zap_m.zap_num_entries--;
- memset(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 0,
- sizeof (mzap_ent_phys_t));
- mze_remove(zap, mze);
+ memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
+ zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
}
}
zap_name_free(zn);
@@ -1447,6 +1538,23 @@ zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
return (err);
}
+static int
+zap_remove_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
+ dmu_tx_t *tx, const void *tag)
+{
+ int err;
+
+ zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+ if (zn == NULL) {
+ zap_unlockdir(zap, tag);
+ return (SET_ERROR(ENOTSUP));
+ }
+ err = fzap_remove(zn, tx);
+ zap_name_free(zn);
+ zap_unlockdir(zap, tag);
+ return (err);
+}
+
int
zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx)
@@ -1457,14 +1565,23 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
if (err != 0)
return (err);
- zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
- if (zn == NULL) {
- zap_unlockdir(zap, FTAG);
- return (SET_ERROR(ENOTSUP));
- }
- err = fzap_remove(zn, tx);
- zap_name_free(zn);
- zap_unlockdir(zap, FTAG);
+ err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
+ /* zap_remove_uint64_impl() calls zap_unlockdir() */
+ return (err);
+}
+
+int
+zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
+ dmu_tx_t *tx)
+{
+ zap_t *zap;
+
+ int err =
+ zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
+ /* zap_remove_uint64_impl() calls zap_unlockdir() */
return (err);
}
@@ -1582,29 +1699,30 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
if (!zc->zc_zap->zap_ismicro) {
err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
} else {
- avl_index_t idx;
+ zfs_btree_index_t idx;
mzap_ent_t mze_tofind;
- mze_tofind.mze_hash = zc->zc_hash;
+ mze_tofind.mze_hash = zc->zc_hash >> 32;
mze_tofind.mze_cd = zc->zc_cd;
- mzap_ent_t *mze =
- avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
+ mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
+ &mze_tofind, &idx);
if (mze == NULL) {
- mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
- idx, AVL_AFTER);
+ mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
+ &idx, &idx);
}
if (mze) {
mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
za->za_normalization_conflict =
- mzap_normalization_conflict(zc->zc_zap, NULL, mze);
+ mzap_normalization_conflict(zc->zc_zap, NULL,
+ mze, &idx);
za->za_integer_length = 8;
za->za_num_integers = 1;
za->za_first_integer = mzep->mze_value;
(void) strlcpy(za->za_name, mzep->mze_name,
sizeof (za->za_name));
- zc->zc_hash = mze->mze_hash;
+ zc->zc_hash = (uint64_t)mze->mze_hash << 32;
zc->zc_cd = mze->mze_cd;
err = 0;
} else {
@@ -1669,14 +1787,17 @@ EXPORT_SYMBOL(zap_prefetch_uint64);
EXPORT_SYMBOL(zap_add);
EXPORT_SYMBOL(zap_add_by_dnode);
EXPORT_SYMBOL(zap_add_uint64);
+EXPORT_SYMBOL(zap_add_uint64_by_dnode);
EXPORT_SYMBOL(zap_update);
EXPORT_SYMBOL(zap_update_uint64);
+EXPORT_SYMBOL(zap_update_uint64_by_dnode);
EXPORT_SYMBOL(zap_length);
EXPORT_SYMBOL(zap_length_uint64);
EXPORT_SYMBOL(zap_remove);
EXPORT_SYMBOL(zap_remove_by_dnode);
EXPORT_SYMBOL(zap_remove_norm);
EXPORT_SYMBOL(zap_remove_uint64);
+EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
EXPORT_SYMBOL(zap_count);
EXPORT_SYMBOL(zap_value_search);
EXPORT_SYMBOL(zap_join);
@@ -1695,4 +1816,8 @@ EXPORT_SYMBOL(zap_cursor_advance);
EXPORT_SYMBOL(zap_cursor_serialize);
EXPORT_SYMBOL(zap_cursor_init_serialized);
EXPORT_SYMBOL(zap_get_stats);
+
+/* CSTYLED */
+ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW,
+ "Maximum micro ZAP size, before converting to a fat ZAP, in bytes");
#endif