aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2021-07-01 15:30:31 +0000
committerGitHub <noreply@github.com>2021-07-01 15:30:31 +0000
commit490c845efe3ca29eaa8aa6ea1e3f45eda72895fe (patch)
tree50711d7c528813a30a86e0b2bf128057d52f53bf
parentc6d1112bf4125e5a22eb47ceb7b8cee01f0df9a1 (diff)
downloadsrc-490c845efe3ca29eaa8aa6ea1e3f45eda72895fe.tar.gz
src-490c845efe3ca29eaa8aa6ea1e3f45eda72895fe.zip
Compact dbuf/buf hashes and lock arrays
With default dbuf cache size of 1/32 of ARC, it makes no sense to have hash table of the same size (or even bigger on Linux). Reduce it to 1/8 of ARC's one, still leaving some slack, assuming higher I/O rate via dbuf cache than via ARC. Remove padding from ARC hash locks array. The idea behind padding is to avoid false sharing between locks. It would have sense if there would be a limited number of very busy locks. But since we have no limit on the number, using the same memory for more locks we can achieve even lower lock contention with the same false sharing, or we can use less memory for the same contention level. Reduce number of hash locks from 8192 to 2048. The number is still big enough to not cause contention, but reduced memory size improves cache hit rate for mutex_tryenter() in ARC eviction thread, saving about 1% of the thread time. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #12289
-rw-r--r--include/sys/dbuf.h4
-rw-r--r--module/zfs/arc.c25
-rw-r--r--module/zfs/dbuf.c6
3 files changed, 11 insertions, 24 deletions
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index d221eac4c816..6ae079c6a64b 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -322,12 +322,12 @@ typedef struct dmu_buf_impl {
} dmu_buf_impl_t;
/* Note: the dbuf hash table is exposed only for the mdb module */
-#define DBUF_MUTEXES 8192
+#define DBUF_MUTEXES 2048
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
- kmutex_t hash_mutexes[DBUF_MUTEXES];
+ kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
} dbuf_hash_table_t;
typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 3484fff3b4d4..394ca1bfe42d 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -740,29 +740,18 @@ taskq_t *arc_prune_taskq;
* Hash table routines
*/
-#define HT_LOCK_ALIGN 64
-#define HT_LOCK_PAD (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
-
-struct ht_lock {
- kmutex_t ht_lock;
-#ifdef _KERNEL
- unsigned char pad[HT_LOCK_PAD];
-#endif
-};
-
-#define BUF_LOCKS 8192
+#define BUF_LOCKS 2048
typedef struct buf_hash_table {
uint64_t ht_mask;
arc_buf_hdr_t **ht_table;
- struct ht_lock ht_locks[BUF_LOCKS];
+ kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
} buf_hash_table_t;
static buf_hash_table_t buf_hash_table;
#define BUF_HASH_INDEX(spa, dva, birth) \
(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
-#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
-#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
+#define BUF_HASH_LOCK(idx) (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
#define HDR_LOCK(hdr) \
(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
@@ -1111,7 +1100,7 @@ buf_fini(void)
(buf_hash_table.ht_mask + 1) * sizeof (void *));
#endif
for (i = 0; i < BUF_LOCKS; i++)
- mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
+ mutex_destroy(BUF_HASH_LOCK(i));
kmem_cache_destroy(hdr_full_cache);
kmem_cache_destroy(hdr_full_crypt_cache);
kmem_cache_destroy(hdr_l2only_cache);
@@ -1276,10 +1265,8 @@ retry:
for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
- for (i = 0; i < BUF_LOCKS; i++) {
- mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
- NULL, MUTEX_DEFAULT, NULL);
- }
+ for (i = 0; i < BUF_LOCKS; i++)
+ mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
}
#define ARC_MINTIME (hz>>4) /* 62 ms */
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 9ce091b80dcb..289247c6ed65 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -826,12 +826,12 @@ dbuf_init(void)
int i;
/*
- * The hash table is big enough to fill all of physical memory
+ * The hash table is big enough to fill one eighth of physical memory
* with an average block size of zfs_arc_average_blocksize (default 8K).
* By default, the table will take up
* totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
*/
- while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE)
+ while (hsize * zfs_arc_average_blocksize < arc_all_memory() / 8)
hsize <<= 1;
retry:
@@ -3055,8 +3055,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_state = DB_EVICTING; /* not worth logging this state change */
if ((odb = dbuf_hash_insert(db)) != NULL) {
/* someone else inserted it first */
- kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx);
+ kmem_cache_free(dbuf_kmem_cache, db);
DBUF_STAT_BUMP(hash_insert_race);
return (odb);
}