aboutsummaryrefslogtreecommitdiff
path: root/module/zfs/arc.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r--module/zfs/arc.c307
1 files changed, 229 insertions, 78 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 3ba198380733..e05b11d51942 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -492,6 +492,8 @@ arc_stats_t arc_stats = {
{ "evict_not_enough", KSTAT_DATA_UINT64 },
{ "evict_l2_cached", KSTAT_DATA_UINT64 },
{ "evict_l2_eligible", KSTAT_DATA_UINT64 },
+ { "evict_l2_eligible_mfu", KSTAT_DATA_UINT64 },
+ { "evict_l2_eligible_mru", KSTAT_DATA_UINT64 },
{ "evict_l2_ineligible", KSTAT_DATA_UINT64 },
{ "evict_l2_skip", KSTAT_DATA_UINT64 },
{ "hash_elements", KSTAT_DATA_UINT64 },
@@ -533,6 +535,11 @@ arc_stats_t arc_stats = {
{ "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
{ "l2_hits", KSTAT_DATA_UINT64 },
{ "l2_misses", KSTAT_DATA_UINT64 },
+ { "l2_prefetch_asize", KSTAT_DATA_UINT64 },
+ { "l2_mru_asize", KSTAT_DATA_UINT64 },
+ { "l2_mfu_asize", KSTAT_DATA_UINT64 },
+ { "l2_bufc_data_asize", KSTAT_DATA_UINT64 },
+ { "l2_bufc_metadata_asize", KSTAT_DATA_UINT64 },
{ "l2_feeds", KSTAT_DATA_UINT64 },
{ "l2_rw_clash", KSTAT_DATA_UINT64 },
{ "l2_read_bytes", KSTAT_DATA_UINT64 },
@@ -894,6 +901,17 @@ static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
static void l2arc_read_done(zio_t *);
static void l2arc_do_free_on_write(void);
+static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
+ boolean_t state_only);
+
+#define l2arc_hdr_arcstats_increment(hdr) \
+ l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
+#define l2arc_hdr_arcstats_decrement(hdr) \
+ l2arc_hdr_arcstats_update((hdr), B_FALSE, B_FALSE)
+#define l2arc_hdr_arcstats_increment_state(hdr) \
+ l2arc_hdr_arcstats_update((hdr), B_TRUE, B_TRUE)
+#define l2arc_hdr_arcstats_decrement_state(hdr) \
+ l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
/*
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
@@ -951,7 +969,7 @@ static void l2arc_log_blk_fetch_abort(zio_t *zio);
/* L2ARC persistence block restoration routines. */
static void l2arc_log_blk_restore(l2arc_dev_t *dev,
- const l2arc_log_blk_phys_t *lb, uint64_t lb_asize, uint64_t lb_daddr);
+ const l2arc_log_blk_phys_t *lb, uint64_t lb_asize);
static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
l2arc_dev_t *dev);
@@ -1727,7 +1745,7 @@ static arc_buf_hdr_t *
arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth,
enum zio_compress compress, uint8_t complevel, boolean_t protected,
- boolean_t prefetch)
+ boolean_t prefetch, arc_state_type_t arcs_state)
{
arc_buf_hdr_t *hdr;
@@ -1751,6 +1769,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
hdr->b_l2hdr.b_dev = dev;
hdr->b_l2hdr.b_daddr = daddr;
+ hdr->b_l2hdr.b_arcs_state = arcs_state;
return (hdr);
}
@@ -2312,7 +2331,11 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
arc_evictable_space_decrement(hdr, state);
}
/* remove the prefetch flag if we get a reference */
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_decrement_state(hdr);
arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_increment_state(hdr);
}
}
@@ -2595,9 +2618,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
}
}
- if (HDR_HAS_L1HDR(hdr))
+ if (HDR_HAS_L1HDR(hdr)) {
hdr->b_l1hdr.b_state = new_state;
+ if (HDR_HAS_L2HDR(hdr) && new_state != arc_l2c_only) {
+ l2arc_hdr_arcstats_decrement_state(hdr);
+ hdr->b_l2hdr.b_arcs_state = new_state->arcs_state;
+ l2arc_hdr_arcstats_increment_state(hdr);
+ }
+ }
+
/*
* L2 headers should never be on the L2 state list since they don't
* have L1 headers allocated.
@@ -3685,6 +3715,76 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
}
static void
+l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
+ boolean_t state_only)
+{
+ l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
+ l2arc_dev_t *dev = l2hdr->b_dev;
+ uint64_t lsize = HDR_GET_LSIZE(hdr);
+ uint64_t psize = HDR_GET_PSIZE(hdr);
+ uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
+ arc_buf_contents_t type = hdr->b_type;
+ int64_t lsize_s;
+ int64_t psize_s;
+ int64_t asize_s;
+
+ if (incr) {
+ lsize_s = lsize;
+ psize_s = psize;
+ asize_s = asize;
+ } else {
+ lsize_s = -lsize;
+ psize_s = -psize;
+ asize_s = -asize;
+ }
+
+ /* If the buffer is a prefetch, count it as such. */
+ if (HDR_PREFETCH(hdr)) {
+ ARCSTAT_INCR(arcstat_l2_prefetch_asize, asize_s);
+ } else {
+ /*
+ * We use the value stored in the L2 header upon initial
+ * caching in L2ARC. This value will be updated in case
+ * an MRU/MRU_ghost buffer transitions to MFU but the L2ARC
+ * metadata (log entry) cannot currently be updated. Having
+ * the ARC state in the L2 header solves the problem of a
+ * possibly absent L1 header (apparent in buffers restored
+ * from persistent L2ARC).
+ */
+ switch (hdr->b_l2hdr.b_arcs_state) {
+ case ARC_STATE_MRU_GHOST:
+ case ARC_STATE_MRU:
+ ARCSTAT_INCR(arcstat_l2_mru_asize, asize_s);
+ break;
+ case ARC_STATE_MFU_GHOST:
+ case ARC_STATE_MFU:
+ ARCSTAT_INCR(arcstat_l2_mfu_asize, asize_s);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (state_only)
+ return;
+
+ ARCSTAT_INCR(arcstat_l2_psize, psize_s);
+ ARCSTAT_INCR(arcstat_l2_lsize, lsize_s);
+
+ switch (type) {
+ case ARC_BUFC_DATA:
+ ARCSTAT_INCR(arcstat_l2_bufc_data_asize, asize_s);
+ break;
+ case ARC_BUFC_METADATA:
+ ARCSTAT_INCR(arcstat_l2_bufc_metadata_asize, asize_s);
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void
arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
@@ -3697,9 +3797,7 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
list_remove(&dev->l2ad_buflist, hdr);
- ARCSTAT_INCR(arcstat_l2_psize, -psize);
- ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
-
+ l2arc_hdr_arcstats_decrement(hdr);
vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
(void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr),
@@ -3903,6 +4001,21 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
if (l2arc_write_eligible(hdr->b_spa, hdr)) {
ARCSTAT_INCR(arcstat_evict_l2_eligible,
HDR_GET_LSIZE(hdr));
+
+ switch (state->arcs_state) {
+ case ARC_STATE_MRU:
+ ARCSTAT_INCR(
+ arcstat_evict_l2_eligible_mru,
+ HDR_GET_LSIZE(hdr));
+ break;
+ case ARC_STATE_MFU:
+ ARCSTAT_INCR(
+ arcstat_evict_l2_eligible_mfu,
+ HDR_GET_LSIZE(hdr));
+ break;
+ default:
+ break;
+ }
} else {
ARCSTAT_INCR(arcstat_evict_l2_ineligible,
HDR_GET_LSIZE(hdr));
@@ -4769,14 +4882,7 @@ arc_kmem_reap_soon(void)
static boolean_t
arc_evict_cb_check(void *arg, zthr_t *zthr)
{
- /*
- * This is necessary so that any changes which may have been made to
- * many of the zfs_arc_* module parameters will be propagated to
- * their actual internal variable counterparts. Without this,
- * changing those module params at runtime would have no effect.
- */
- arc_tuning_update(B_FALSE);
-
+#ifdef ZFS_DEBUG
/*
* This is necessary in order to keep the kstat information
* up to date for tools that display kstat data such as the
@@ -4784,15 +4890,15 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
* typically do not call kstat's update function, but simply
* dump out stats from the most recent update. Without
* this call, these commands may show stale stats for the
- * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
- * with this change, the data might be up to 1 second
- * out of date(the arc_evict_zthr has a maximum sleep
- * time of 1 second); but that should suffice. The
- * arc_state_t structures can be queried directly if more
- * accurate information is needed.
+ * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
+ * with this call, the data might be out of date if the
+ * evict thread hasn't been woken recently; but that should
+ * suffice. The arc_state_t structures can be queried
+ * directly if more accurate information is needed.
*/
if (arc_ksp != NULL)
arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+#endif
/*
* We have to rely on arc_wait_for_eviction() to tell us when to
@@ -5345,11 +5451,15 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
ASSERT(multilist_link_active(
&hdr->b_l1hdr.b_arc_node));
} else {
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_decrement_state(hdr);
arc_hdr_clear_flags(hdr,
ARC_FLAG_PREFETCH |
ARC_FLAG_PRESCIENT_PREFETCH);
atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_increment_state(hdr);
}
hdr->b_l1hdr.b_arc_access = now;
return;
@@ -5380,13 +5490,16 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* was evicted from the cache. Move it to the
* MFU state.
*/
-
if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
new_state = arc_mru;
if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) {
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_decrement_state(hdr);
arc_hdr_clear_flags(hdr,
ARC_FLAG_PREFETCH |
ARC_FLAG_PRESCIENT_PREFETCH);
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_increment_state(hdr);
}
DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
} else {
@@ -5639,7 +5752,7 @@ arc_read_done(zio_t *zio)
*/
int callback_cnt = 0;
for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
- if (!acb->acb_done)
+ if (!acb->acb_done || acb->acb_nobuf)
continue;
callback_cnt++;
@@ -5804,6 +5917,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) &&
(zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp);
+ boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
int rc = 0;
ASSERT(!embedded_bp ||
@@ -5888,6 +6002,7 @@ top:
acb->acb_compressed = compressed_read;
acb->acb_encrypted = encrypted_read;
acb->acb_noauth = noauth_read;
+ acb->acb_nobuf = no_buf;
acb->acb_zb = *zb;
if (pio != NULL)
acb->acb_zio_dummy = zio_null(pio,
@@ -5897,8 +6012,6 @@ top:
acb->acb_zio_head = head_zio;
acb->acb_next = hdr->b_l1hdr.b_acb;
hdr->b_l1hdr.b_acb = acb;
- mutex_exit(hash_lock);
- goto out;
}
mutex_exit(hash_lock);
goto out;
@@ -5907,7 +6020,7 @@ top:
ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
hdr->b_l1hdr.b_state == arc_mfu);
- if (done) {
+ if (done && !no_buf) {
if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
/*
* This is a demand read which does not have to
@@ -5961,8 +6074,12 @@ top:
ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
rc != EACCES);
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
- zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
+ zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_decrement_state(hdr);
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_increment_state(hdr);
}
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, hash_lock);
@@ -6106,8 +6223,13 @@ top:
}
if (*arc_flags & ARC_FLAG_PREFETCH &&
- zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt))
+ zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_decrement_state(hdr);
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+ if (HDR_HAS_L2HDR(hdr))
+ l2arc_hdr_arcstats_increment_state(hdr);
+ }
if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
if (*arc_flags & ARC_FLAG_L2CACHE)
@@ -6176,7 +6298,11 @@ top:
metadata, misses);
}
- if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
+ /* Check if the spa even has l2 configured */
+ const boolean_t spa_has_l2 = l2arc_ndev != 0 &&
+ spa->spa_l2cache.sav_count > 0;
+
+ if (vd != NULL && spa_has_l2 && !(l2arc_norw && devw)) {
/*
* Read from the L2ARC if the following are true:
* 1. The L2ARC vdev was previously cached.
@@ -6184,7 +6310,7 @@ top:
* 3. This buffer isn't currently writing to the L2ARC.
* 4. The L2ARC entry wasn't evicted, which may
* also have invalidated the vdev.
- * 5. This isn't prefetch and l2arc_noprefetch is set.
+ * 5. This isn't prefetch or l2arc_noprefetch is 0.
*/
if (HDR_HAS_L2HDR(hdr) &&
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
@@ -6277,15 +6403,24 @@ top:
} else {
if (vd != NULL)
spa_config_exit(spa, SCL_L2ARC, vd);
+
/*
- * Skip ARC stat bump for block pointers with
- * embedded data. The data are read from the blkptr
- * itself via decode_embedded_bp_compressed().
+ * Only a spa with l2 should contribute to l2
+ * miss stats. (Including the case of having a
+ * faulted cache device - that's also a miss.)
*/
- if (l2arc_ndev != 0 && !embedded_bp) {
- DTRACE_PROBE1(l2arc__miss,
- arc_buf_hdr_t *, hdr);
- ARCSTAT_BUMP(arcstat_l2_misses);
+ if (spa_has_l2) {
+ /*
+ * Skip ARC stat bump for block pointers with
+ * embedded data. The data are read from the
+ * blkptr itself via
+ * decode_embedded_bp_compressed().
+ */
+ if (!embedded_bp) {
+ DTRACE_PROBE1(l2arc__miss,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(arcstat_l2_misses);
+ }
}
}
@@ -7070,9 +7205,9 @@ arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
*/
uint64_t total_dirty = reserve + arc_tempreserve + anon_size;
uint64_t spa_dirty_anon = spa_dirty_data(spa);
-
- if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 &&
- anon_size > arc_c * zfs_arc_anon_limit_percent / 100 &&
+ uint64_t rarc_c = arc_warm ? arc_c : arc_c_max;
+ if (total_dirty > rarc_c * zfs_arc_dirty_limit_percent / 100 &&
+ anon_size > rarc_c * zfs_arc_anon_limit_percent / 100 &&
spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) {
#ifdef ZFS_DEBUG
uint64_t meta_esize = zfs_refcount_count(
@@ -7080,9 +7215,9 @@ arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
uint64_t data_esize =
zfs_refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
- "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
+ "anon_data=%lluK tempreserve=%lluK rarc_c=%lluK\n",
arc_tempreserve >> 10, meta_esize >> 10,
- data_esize >> 10, reserve >> 10, arc_c >> 10);
+ data_esize >> 10, reserve >> 10, rarc_c >> 10);
#endif
DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
return (SET_ERROR(ERESTART));
@@ -7450,6 +7585,15 @@ arc_target_bytes(void)
}
void
+arc_set_limits(uint64_t allmem)
+{
+ /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
+ arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+
+ /* How to set default max varies by platform. */
+ arc_c_max = arc_default_max(arc_c_min, allmem);
+}
+void
arc_init(void)
{
uint64_t percent, allmem = arc_all_memory();
@@ -7464,11 +7608,7 @@ arc_init(void)
arc_lowmem_init();
#endif
- /* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
- arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
-
- /* How to set default max varies by platform. */
- arc_c_max = arc_default_max(arc_c_min, allmem);
+ arc_set_limits(allmem);
#ifndef _KERNEL
/*
@@ -7505,6 +7645,8 @@ arc_init(void)
if (arc_c < arc_c_min)
arc_c = arc_c_min;
+ arc_register_hotplug();
+
arc_state_init();
buf_init();
@@ -7513,8 +7655,9 @@ arc_init(void)
offsetof(arc_prune_t, p_node));
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
- arc_prune_taskq = taskq_create("arc_prune", boot_ncpus, defclsyspri,
- boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+ arc_prune_taskq = taskq_create("arc_prune", 100, defclsyspri,
+ boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
+ TASKQ_THREADS_CPU_PCT);
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
@@ -7525,8 +7668,8 @@ arc_init(void)
kstat_install(arc_ksp);
}
- arc_evict_zthr = zthr_create_timer("arc_evict",
- arc_evict_cb_check, arc_evict_cb, NULL, SEC2NSEC(1));
+ arc_evict_zthr = zthr_create("arc_evict",
+ arc_evict_cb_check, arc_evict_cb, NULL);
arc_reap_zthr = zthr_create_timer("arc_reap",
arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1));
@@ -7611,6 +7754,8 @@ arc_fini(void)
buf_fini();
arc_state_fini();
+ arc_unregister_hotplug();
+
/*
* We destroy the zthrs after all the ARC state has been
* torn down to avoid the case of them receiving any
@@ -8066,9 +8211,6 @@ l2arc_write_done(zio_t *zio)
DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
l2arc_write_callback_t *, cb);
- if (zio->io_error != 0)
- ARCSTAT_BUMP(arcstat_l2_writes_error);
-
/*
* All writes completed, or an error was hit.
*/
@@ -8132,8 +8274,7 @@ top:
arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
uint64_t psize = HDR_GET_PSIZE(hdr);
- ARCSTAT_INCR(arcstat_l2_psize, -psize);
- ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
+ l2arc_hdr_arcstats_decrement(hdr);
bytes_dropped +=
vdev_psize_to_asize(dev->l2ad_vdev, psize);
@@ -8181,6 +8322,8 @@ top:
list_destroy(&cb->l2wcb_abd_list);
if (zio->io_error != 0) {
+ ARCSTAT_BUMP(arcstat_l2_writes_error);
+
/*
* Restore the lbps array in the header to its previous state.
* If the list of log block pointers is empty, zero out the
@@ -8746,9 +8889,16 @@ out:
goto top;
}
- ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
- if (!dev->l2ad_first)
- ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
+ if (!all) {
+ /*
+ * In case of cache device removal (all) the following
+ * assertions may be violated without functional consequences
+ * as the device is about to be removed.
+ */
+ ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
+ if (!dev->l2ad_first)
+ ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
+ }
}
/*
@@ -9087,6 +9237,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
hdr->b_l2hdr.b_hits = 0;
hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
+ hdr->b_l2hdr.b_arcs_state =
+ hdr->b_l1hdr.b_state->arcs_state;
arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR);
mutex_enter(&dev->l2ad_mtx);
@@ -9109,6 +9261,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
write_psize += psize;
write_asize += asize;
dev->l2ad_hand += asize;
+ l2arc_hdr_arcstats_increment(hdr);
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
mutex_exit(hash_lock);
@@ -9151,8 +9304,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
ASSERT3U(write_asize, <=, target_sz);
ARCSTAT_BUMP(arcstat_l2_writes_sent);
ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
- ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
- ARCSTAT_INCR(arcstat_l2_psize, write_psize);
dev->l2ad_writing = B_TRUE;
(void) zio_wait(pio);
@@ -9377,8 +9528,6 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
l2arc_dev_hdr_phys_t *l2dhdr;
uint64_t l2dhdr_asize;
spa_t *spa;
- int err;
- boolean_t l2dhdr_valid = B_TRUE;
dev = l2arc_vdev_get(vd);
ASSERT3P(dev, !=, NULL);
@@ -9407,10 +9556,7 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
/*
* Read the device header, if an error is returned do not rebuild L2ARC.
*/
- if ((err = l2arc_dev_hdr_read(dev)) != 0)
- l2dhdr_valid = B_FALSE;
-
- if (l2dhdr_valid && dev->l2ad_log_entries > 0) {
+ if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) {
/*
* If we are onlining a cache device (vdev_reopen) that was
* still present (l2arc_vdev_present()) and rebuild is enabled,
@@ -9710,7 +9856,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
* L2BLK_GET_PSIZE returns aligned size for log blocks.
*/
uint64_t asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
- l2arc_log_blk_restore(dev, this_lb, asize, lbps[0].lbp_daddr);
+ l2arc_log_blk_restore(dev, this_lb, asize);
/*
* log block restored, include its pointer in the list of
@@ -9757,6 +9903,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
!dev->l2ad_first)
goto out;
+ cond_resched();
for (;;) {
mutex_enter(&l2arc_rebuild_thr_lock);
if (dev->l2ad_rebuild_cancel) {
@@ -9790,7 +9937,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
PTR_SWAP(this_lb, next_lb);
this_io = next_io;
next_io = NULL;
- }
+ }
if (this_io != NULL)
l2arc_log_blk_fetch_abort(this_io);
@@ -9857,7 +10004,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
- ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+ ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_SPECULATIVE, B_FALSE));
@@ -10028,7 +10175,7 @@ cleanup:
*/
static void
l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb,
- uint64_t lb_asize, uint64_t lb_daddr)
+ uint64_t lb_asize)
{
uint64_t size = 0, asize = 0;
uint64_t log_entries = dev->l2ad_log_entries;
@@ -10102,19 +10249,18 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
L2BLK_GET_PSIZE((le)->le_prop), le->le_birth,
L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel,
L2BLK_GET_PROTECTED((le)->le_prop),
- L2BLK_GET_PREFETCH((le)->le_prop));
+ L2BLK_GET_PREFETCH((le)->le_prop),
+ L2BLK_GET_STATE((le)->le_prop));
asize = vdev_psize_to_asize(dev->l2ad_vdev,
L2BLK_GET_PSIZE((le)->le_prop));
/*
* vdev_space_update() has to be called before arc_hdr_destroy() to
- * avoid underflow since the latter also calls the former.
+ * avoid underflow since the latter also calls vdev_space_update().
*/
+ l2arc_hdr_arcstats_increment(hdr);
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
- ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(hdr));
- ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(hdr));
-
mutex_enter(&dev->l2ad_mtx);
list_insert_tail(&dev->l2ad_buflist, hdr);
(void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr);
@@ -10134,14 +10280,15 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
arc_hdr_set_flags(exists, ARC_FLAG_HAS_L2HDR);
exists->b_l2hdr.b_dev = dev;
exists->b_l2hdr.b_daddr = le->le_daddr;
+ exists->b_l2hdr.b_arcs_state =
+ L2BLK_GET_STATE((le)->le_prop);
mutex_enter(&dev->l2ad_mtx);
list_insert_tail(&dev->l2ad_buflist, exists);
(void) zfs_refcount_add_many(&dev->l2ad_alloc,
arc_hdr_size(exists), exists);
mutex_exit(&dev->l2ad_mtx);
+ l2arc_hdr_arcstats_increment(exists);
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
- ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(exists));
- ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(exists));
}
ARCSTAT_BUMP(arcstat_l2_rebuild_bufs_precached);
}
@@ -10437,6 +10584,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
L2BLK_SET_TYPE((le)->le_prop, hdr->b_type);
L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr)));
L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr)));
+ L2BLK_SET_STATE((le)->le_prop, hdr->b_l1hdr.b_state->arcs_state);
dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev,
HDR_GET_PSIZE(hdr));
@@ -10605,5 +10753,8 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW,
"Percentage of excess dnodes to try to unpin");
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, INT, ZMOD_RW,
- "When full, ARC allocation waits for eviction of this % of alloc size");
+ "When full, ARC allocation waits for eviction of this % of alloc size");
+
+ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, INT, ZMOD_RW,
+ "The number of headers to evict per sublist before moving to the next");
/* END CSTYLED */