diff options
author | Andrew Gallatin <gallatin@FreeBSD.org> | 2024-10-23 19:16:19 +0000 |
---|---|---|
committer | Andrew Gallatin <gallatin@FreeBSD.org> | 2024-10-23 19:16:19 +0000 |
commit | 81dbc22ce8b66759a9fc4ebdef5cfc7a6185af22 (patch) | |
tree | 5f9af83cf714ad6e3e417a3c64755f33ff36eafc | |
parent | f3dbef108212460489ae68b4c47e20b73984f433 (diff) | |
download | src-81dbc22ce8b6.tar.gz src-81dbc22ce8b6.zip |
mlx5e: Immediately initialize TLS send tags
Under massive connection thrashing (web server restarting), we see
long periods where the web server blocks when enabling ktls offload
when NIC ktls offload is enabled.
It turns out the driver uses a single-threaded linux work queue to
serialize the commands that must be sent to the nic to allocate and
free tls resources. When freeing sessions, this work is handled
asynchronously. However, when allocating sessions, the work is handled
synchronously and the driver waits for the work to complete before
returning. When under massive connection thrashing, the work queue is
first filled by TLS sessions closing. Then when new sessions arrive,
the web server enables kTLS and blocks while the tens or hundreds of
thousands of sessions closes queued up are processed by the NIC.
Rather than using the work queue to open a TLS session on the NIC,
switch to doing the open directly. This allows use to cut in front of
all those sessions that are waiting to close, and minimize the amount
of time the web server blocks. The risk is that the NIC may be out of
resources because it has not processed all of those session frees. So
if we fail to open a session directly, we fall back to using the work
queue.
Differential Revision: https://reviews.freebsd.org/D47260
Sponsored by: Netflix
Reviewed by: kib
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 86 |
1 files changed, 52 insertions, 34 deletions
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c index a8522d68d5aa..c347de650250 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -213,54 +213,63 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv) counter_u64_free(ptls->stats.arg[x]); } + +static int +mlx5e_tls_st_init(struct mlx5e_priv *priv, struct mlx5e_tls_tag *ptag) +{ + int err; + + /* try to open TIS, if not present */ + if (ptag->tisn == 0) { + err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn, + priv->pdn, &ptag->tisn); + if (err) { + MLX5E_TLS_STAT_INC(ptag, tx_error, 1); + return (err); + } + } + MLX5_SET(sw_tls_cntx, ptag->crypto_params, progress.pd, ptag->tisn); + + /* try to allocate a DEK context ID */ + err = mlx5_encryption_key_create(priv->mdev, priv->pdn, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS, + MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, key.key_data), + MLX5_GET(sw_tls_cntx, ptag->crypto_params, key.key_len), + &ptag->dek_index); + if (err) { + MLX5E_TLS_STAT_INC(ptag, tx_error, 1); + return (err); + } + + MLX5_SET(sw_tls_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index); + + ptag->dek_index_ok = 1; + + MLX5E_TLS_TAG_LOCK(ptag); + if (ptag->state == MLX5E_TLS_ST_INIT) + ptag->state = MLX5E_TLS_ST_SETUP; + MLX5E_TLS_TAG_UNLOCK(ptag); + return (0); +} + static void mlx5e_tls_work(struct work_struct *work) { struct mlx5e_tls_tag *ptag; struct mlx5e_priv *priv; - int err; ptag = container_of(work, struct mlx5e_tls_tag, work); priv = container_of(ptag->tls, struct mlx5e_priv, tls); switch (ptag->state) { case MLX5E_TLS_ST_INIT: - /* try to open TIS, if not present */ - if (ptag->tisn == 0) { - err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn, - priv->pdn, &ptag->tisn); - if (err) { - MLX5E_TLS_STAT_INC(ptag, tx_error, 1); - break; - } - } - MLX5_SET(sw_tls_cntx, ptag->crypto_params, progress.pd, ptag->tisn); - - /* try to allocate a DEK context ID */ - err = mlx5_encryption_key_create(priv->mdev, priv->pdn, - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS, - MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, key.key_data), - MLX5_GET(sw_tls_cntx, ptag->crypto_params, key.key_len), - &ptag->dek_index); - if (err) { - MLX5E_TLS_STAT_INC(ptag, tx_error, 1); - break; - } - - MLX5_SET(sw_tls_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index); - - ptag->dek_index_ok = 1; - - MLX5E_TLS_TAG_LOCK(ptag); - if (ptag->state == MLX5E_TLS_ST_INIT) - ptag->state = MLX5E_TLS_ST_SETUP; - MLX5E_TLS_TAG_UNLOCK(ptag); + (void)mlx5e_tls_st_init(priv, ptag); break; case MLX5E_TLS_ST_RELEASE: /* try to destroy DEK context by ID */ if (ptag->dek_index_ok) - err = mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index); + (void)mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index); /* free tag */ mlx5e_tls_tag_zfree(ptag); @@ -441,8 +450,17 @@ mlx5e_tls_snd_tag_alloc(if_t ifp, /* reset state */ ptag->state = MLX5E_TLS_ST_INIT; - queue_work(priv->tls.wq, &ptag->work); - flush_work(&ptag->work); + /* + * Try to immediately init the tag. We may fail if the NIC's + * resources are tied up with send tags that are in the work + * queue, waiting to be freed. So if we fail, put ourselves + * on the queue so as to try again after resouces have been freed. + */ + error = mlx5e_tls_st_init(priv, ptag); + if (error != 0) { + queue_work(priv->tls.wq, &ptag->work); + flush_work(&ptag->work); + } return (0); |