diff options
Diffstat (limited to 'sys/contrib/openzfs/module/zfs/spa.c')
-rw-r--r-- | sys/contrib/openzfs/module/zfs/spa.c | 100 |
1 files changed, 63 insertions, 37 deletions
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index c3800e018c73..638572996c3a 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -34,6 +34,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2021, Colm Buckley <colm@tuatha.org> * Copyright (c) 2023 Hewlett Packard Enterprise Development LP. + * Copyright (c) 2024, Klara Inc. */ /* @@ -170,14 +171,19 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { * that scales with the number of CPUs. * * The different taskq priorities are to handle the different contexts (issue - * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that - * need to be handled with minimum delay. + * and interrupt) and then to reserve threads for high priority I/Os that + * need to be handled with minimum delay. Illumos taskq has unfair TQ_FRONT + * implementation, so separate high priority threads are used there. */ static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */ +#ifdef illumos { ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */ +#else + { ZTI_SYNC, ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* WRITE */ +#endif { ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FLUSH */ @@ -1217,7 +1223,7 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) * * Example (the defaults for READ and WRITE) * zio_taskq_read='fixed,1,8 null scale null' - * zio_taskq_write='sync fixed,1,5 scale fixed,1,5' + * zio_taskq_write='sync null scale null' * * Each sets the entire row at a time. * @@ -1484,9 +1490,9 @@ spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS) * Note that a type may have multiple discrete taskqs to avoid lock contention * on the taskq itself. */ -static taskq_t * -spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - zio_t *zio) +void +spa_taskq_dispatch(spa_t *spa, zio_type_t t, zio_taskq_type_t q, + task_func_t *func, zio_t *zio, boolean_t cutinline) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; taskq_t *tq; @@ -1494,37 +1500,25 @@ spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q, ASSERT3P(tqs->stqs_taskq, !=, NULL); ASSERT3U(tqs->stqs_count, !=, 0); + /* + * NB: We are assuming that the zio can only be dispatched + * to a single taskq at a time. It would be a grievous error + * to dispatch the zio to another taskq at the same time. + */ + ASSERT(zio); + ASSERT(taskq_empty_ent(&zio->io_tqent)); + if (tqs->stqs_count == 1) { tq = tqs->stqs_taskq[0]; } else if ((t == ZIO_TYPE_WRITE) && (q == ZIO_TASKQ_ISSUE) && - (zio != NULL) && ZIO_HAS_ALLOCATOR(zio)) { + ZIO_HAS_ALLOCATOR(zio)) { tq = tqs->stqs_taskq[zio->io_allocator % tqs->stqs_count]; } else { tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; } - return (tq); -} - -void -spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, - zio_t *zio) -{ - taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, zio); - taskq_dispatch_ent(tq, func, arg, flags, ent); -} -/* - * Same as spa_taskq_dispatch_ent() but block on the task until completion. - */ -void -spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q, - task_func_t *func, void *arg, uint_t flags) -{ - taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, NULL); - taskqid_t id = taskq_dispatch(tq, func, arg, flags); - if (id) - taskq_wait_id(tq, id); + taskq_dispatch_ent(tq, func, zio, cutinline ? TQ_FRONT : 0, + &zio->io_tqent); } static void @@ -1986,7 +1980,8 @@ spa_destroy_aux_threads(spa_t *spa) static void spa_unload(spa_t *spa) { - ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(MUTEX_HELD(&spa_namespace_lock) || + spa->spa_export_thread == curthread); ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED); spa_import_progress_remove(spa_guid(spa)); @@ -6950,7 +6945,7 @@ static int spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { - int error; + int error = 0; spa_t *spa; hrtime_t export_start = gethrtime(); @@ -6974,8 +6969,8 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, spa->spa_is_exporting = B_TRUE; /* - * Put a hold on the pool, drop the namespace lock, stop async tasks, - * reacquire the namespace lock, and see if we can export. + * Put a hold on the pool, drop the namespace lock, stop async tasks + * and see if we can export. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); @@ -6985,10 +6980,14 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, taskq_wait(spa->spa_zvol_taskq); } mutex_enter(&spa_namespace_lock); + spa->spa_export_thread = curthread; spa_close(spa, FTAG); - if (spa->spa_state == POOL_STATE_UNINITIALIZED) + if (spa->spa_state == POOL_STATE_UNINITIALIZED) { + mutex_exit(&spa_namespace_lock); goto export_spa; + } + /* * The pool will be in core if it's openable, in which case we can * modify its state. Objsets may be open only because they're dirty, @@ -7009,6 +7008,14 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, goto fail; } + mutex_exit(&spa_namespace_lock); + /* + * At this point we no longer hold the spa_namespace_lock and + * there were no references on the spa. Future spa_lookups will + * notice the spa->spa_export_thread and wait until we signal + * that we are finshed. + */ + if (spa->spa_sync_on) { vdev_t *rvd = spa->spa_root_vdev; /* @@ -7020,6 +7027,7 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { error = SET_ERROR(EXDEV); + mutex_enter(&spa_namespace_lock); goto fail; } @@ -7084,6 +7092,13 @@ export_spa: if (oldconfig && spa->spa_config) *oldconfig = fnvlist_dup(spa->spa_config); + if (new_state == POOL_STATE_EXPORTED) + zio_handle_export_delay(spa, gethrtime() - export_start); + + /* + * Take the namespace lock for the actual spa_t removal + */ + mutex_enter(&spa_namespace_lock); if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) spa_write_cachefile(spa, B_TRUE, B_TRUE, B_FALSE); @@ -7095,17 +7110,25 @@ export_spa: * we make sure to reset the exporting flag. */ spa->spa_is_exporting = B_FALSE; + spa->spa_export_thread = NULL; } - if (new_state == POOL_STATE_EXPORTED) - zio_handle_export_delay(spa, gethrtime() - export_start); - + /* + * Wake up any waiters in spa_lookup() + */ + cv_broadcast(&spa_namespace_cv); mutex_exit(&spa_namespace_lock); return (0); fail: spa->spa_is_exporting = B_FALSE; + spa->spa_export_thread = NULL; + spa_async_resume(spa); + /* + * Wake up any waiters in spa_lookup() + */ + cv_broadcast(&spa_namespace_cv); mutex_exit(&spa_namespace_lock); return (error); } @@ -10145,6 +10168,9 @@ spa_sync(spa_t *spa, uint64_t txg) metaslab_class_evict_old(spa->spa_normal_class, txg); metaslab_class_evict_old(spa->spa_log_class, txg); + /* spa_embedded_log_class has only one metaslab per vdev. */ + metaslab_class_evict_old(spa->spa_special_class, txg); + metaslab_class_evict_old(spa->spa_dedup_class, txg); spa_sync_close_syncing_log_sm(spa); |