src - FreeBSD source tree

diff options


context:
space:
mode:

author	Hans Petter Selasky <hselasky@FreeBSD.org>	2018-05-29 14:04:57 +0000
committer	Hans Petter Selasky <hselasky@FreeBSD.org>	2018-05-29 14:04:57 +0000
commit	38535d6cab17b86db2806866ab9b7a2a30c1ab90 (patch)
tree	2584a59ce27b6eb2b6fff0e6c4bdd910ba0e8b76 /sys/dev
parent	9c7c97c0fff62c3d801f36bd32bb98d5189c862f (diff)
download	src-38535d6cab17b86db2806866ab9b7a2a30c1ab90.tar.gz src-38535d6cab17b86db2806866ab9b7a2a30c1ab90.zip

Add support for hardware rate limiting to mlx5en(4).

The hardware rate limiting feature is enabled by the RATELIMIT kernel option. Please refer to ifconfig(8) and the txrtlmt option and the SO_MAX_PACING_RATE set socket option for more information. This feature is compatible with hardware transmit send offload, TSO. A set of sysctl(8) knobs under dev.mce.<N>.rate_limit are provided to setup the ratelimit table and also to fine tune various rate limit related parameters. Sponsored by: Mellanox Technologies

Notes

Notes: svn path=/head/; revision=334332

Diffstat (limited to 'sys/dev')

-rw-r--r--

sys/dev/mlx5/driver.h

-rw-r--r--

sys/dev/mlx5/mlx5_core/mlx5_main.c

-rw-r--r--

sys/dev/mlx5/mlx5_core/mlx5_rl.c

206

-rw-r--r--

sys/dev/mlx5/mlx5_en/en.h

-rw-r--r--

sys/dev/mlx5/mlx5_en/en_rl.h

174

-rw-r--r--

sys/dev/mlx5/mlx5_en/mlx5_en_main.c

-rw-r--r--

sys/dev/mlx5/mlx5_en/mlx5_en_rl.c

1539

-rw-r--r--

sys/dev/mlx5/mlx5_en/mlx5_en_tx.c

8 files changed, 2053 insertions, 1 deletions

diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h
index 4a82fde934e7..70e1927b7287 100644
--- a/sys/dev/mlx5/driver.h
+++ b/sys/dev/mlx5/driver.h

@@ -28,6 +28,8 @@

#ifndef MLX5_DRIVER_H

#define MLX5_DRIVER_H

+#include "opt_ratelimit.h"

#include <linux/kernel.h>

#include <linux/completion.h>

#include <linux/pci.h>

@@ -500,7 +502,11 @@ struct mlx5_core_health {

struct delayed_work recover_work;

};

+#ifdef RATELIMIT

+#define MLX5_CQ_LINEAR_ARRAY_SIZE (128 * 1024)

+#else

#define MLX5_CQ_LINEAR_ARRAY_SIZE 1024

+#endif

struct mlx5_cq_linear_array_entry {

spinlock_t lock;

@@ -540,6 +546,23 @@ struct mlx5_irq_info {

char name[MLX5_MAX_IRQ_NAME];

};

+#ifdef RATELIMIT

+struct mlx5_rl_entry {

+ u32 rate;

+ u16 burst;

+ u16 index;

+ u32 refcount;

+};

+struct mlx5_rl_table {

+ struct mutex rl_lock;

+ u16 max_size;

+ u32 max_rate;

+ u32 min_rate;

+ struct mlx5_rl_entry *rl_entry;

+};

+#endif

struct mlx5_priv {

char name[MLX5_MAX_NAME_LEN];

struct mlx5_eq_table eq_table;

@@ -592,6 +615,9 @@ struct mlx5_priv {

struct list_head ctx_list;

spinlock_t ctx_lock;

unsigned long pci_dev_data;

+#ifdef RATELIMIT

+ struct mlx5_rl_table rl_table;

+#endif

};

enum mlx5_device_state {

@@ -1084,5 +1110,17 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)

{

return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);

}

+#ifdef RATELIMIT

+int mlx5_init_rl_table(struct mlx5_core_dev *dev);

+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);

+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index);

+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst);

+bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst);

+static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)

+ return !!(dev->priv.rl_table.max_size);

+#endif

#endif /* MLX5_DRIVER_H */

diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c
index c7406d1413a2..25b789dc8aa4 100644
--- a/sys/dev/mlx5/mlx5_core/mlx5_main.c
+++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c

@@ -905,8 +905,23 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)

mlx5_init_srq_table(dev);

mlx5_init_mr_table(dev);

+#ifdef RATELIMIT

+ err = mlx5_init_rl_table(dev);

+ if (err) {

+ dev_err(&pdev->dev, "Failed to init rate limiting\n");

+ goto err_tables_cleanup;

+ }

+#endif

return 0;

+#ifdef RATELIMIT

+err_tables_cleanup:

+ mlx5_cleanup_mr_table(dev);

+ mlx5_cleanup_srq_table(dev);

+ mlx5_cleanup_qp_table(dev);

+ mlx5_cleanup_cq_table(dev);

+#endif

err_eq_cleanup:

mlx5_eq_cleanup(dev);

@@ -916,6 +931,9 @@ out:

static void mlx5_cleanup_once(struct mlx5_core_dev *dev)

{

+#ifdef RATELIMIT

+ mlx5_cleanup_rl_table(dev);

+#endif

mlx5_cleanup_mr_table(dev);

mlx5_cleanup_srq_table(dev);

mlx5_cleanup_qp_table(dev);

diff --git a/sys/dev/mlx5/mlx5_core/mlx5_rl.c b/sys/dev/mlx5/mlx5_core/mlx5_rl.c
new file mode 100644
index 000000000000..f3d4cbecfc20
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_core/mlx5_rl.c

@@ -0,0 +1,206 @@

+/*-

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE

+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

+ * SUCH DAMAGE.

+ *

+ * $FreeBSD$

+ */

+#include <linux/kernel.h>

+#include <linux/module.h>

+#include <dev/mlx5/driver.h>

+#include "mlx5_core.h"

+#ifdef RATELIMIT

+/* Finds an entry where we can register the given rate

+ * If the rate already exists, return the entry where it is registered,

+ * otherwise return the first available entry.

+ * If the table is full, return NULL

+ */

+static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,

+ u32 rate, u16 burst)

+ struct mlx5_rl_entry *ret_entry = NULL;

+ struct mlx5_rl_entry *entry;

+ u16 i;

+ for (i = 0; i < table->max_size; i++) {

+ entry = table->rl_entry + i;

+ if (entry->rate == rate && entry->burst == burst)

+ return entry;

+ if (ret_entry == NULL && entry->rate == 0)

+ ret_entry = entry;

+ }

+ return ret_entry;

+static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,

+ u32 rate, u32 burst, u16 index)

+ u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0};

+ u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};

+ MLX5_SET(set_rate_limit_in, in, opcode,

+ MLX5_CMD_OP_SET_RATE_LIMIT);

+ MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);

+ MLX5_SET(set_rate_limit_in, in, rate_limit, rate);

+ if (MLX5_CAP_QOS(dev, packet_pacing_burst_bound))

+ MLX5_SET(set_rate_limit_in, in, burst_upper_bound, burst);

+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));

+bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst)

+ const struct mlx5_rl_table *table = &dev->priv.rl_table;

+ return (rate <= table->max_rate && rate >= table->min_rate &&

+ burst <= 65535);

+EXPORT_SYMBOL(mlx5_rl_is_in_range);

+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index)

+ struct mlx5_rl_table *table = &dev->priv.rl_table;

+ struct mlx5_rl_entry *entry;

+ int err = 0;

+ mutex_lock(&table->rl_lock);

+ if (!rate || !mlx5_rl_is_in_range(dev, rate, burst)) {

+ mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",

+ rate, table->min_rate, table->max_rate);

+ err = -ERANGE;

+ goto out;

+ }

+ entry = find_rl_entry(table, rate, burst);

+ if (!entry) {

+ mlx5_core_err(dev, "Max number of %u rates reached\n",

+ table->max_size);

+ err = -ENOSPC;

+ goto out;

+ }

+ if (entry->refcount == 0xFFFFFFFFU) {

+ /* out of refcounts */

+ err = -ENOMEM;

+ goto out;

+ } else if (entry->refcount != 0) {

+ /* rate already configured */

+ entry->refcount++;

+ } else {

+ /* new rate limit */

+ err = mlx5_set_rate_limit_cmd(dev, rate, burst, entry->index);

+ if (err) {

+ mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",

+ rate, err);

+ goto out;

+ }

+ entry->rate = rate;

+ entry->burst = burst;

+ entry->refcount = 1;

+ }

+ *index = entry->index;

+out:

+ mutex_unlock(&table->rl_lock);

+ return err;

+EXPORT_SYMBOL(mlx5_rl_add_rate);

+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst)

+ struct mlx5_rl_table *table = &dev->priv.rl_table;

+ struct mlx5_rl_entry *entry = NULL;

+ /* 0 is a reserved value for unlimited rate */

+ if (rate == 0)

+ return;

+ mutex_lock(&table->rl_lock);

+ entry = find_rl_entry(table, rate, burst);

+ if (!entry || !entry->refcount) {

+ mlx5_core_warn(dev, "Rate %u is not configured\n", rate);

+ goto out;

+ }

+ entry->refcount--;

+ if (!entry->refcount) {

+ /* need to remove rate */

+ mlx5_set_rate_limit_cmd(dev, 0, 0, entry->index);

+ entry->rate = 0;

+ entry->burst = 0;

+ }

+out:

+ mutex_unlock(&table->rl_lock);

+EXPORT_SYMBOL(mlx5_rl_remove_rate);

+int mlx5_init_rl_table(struct mlx5_core_dev *dev)

+ struct mlx5_rl_table *table = &dev->priv.rl_table;

+ int i;

+ mutex_init(&table->rl_lock);

+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {

+ table->max_size = 0;

+ return 0;

+ }

+ /* First entry is reserved for unlimited rate */

+ table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;

+ table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);

+ table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);

+ table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),

+ GFP_KERNEL);

+ if (!table->rl_entry)

+ return -ENOMEM;

+ /* The index represents the index in HW rate limit table

+ * Index 0 is reserved for unlimited rate

+ */

+ for (i = 0; i < table->max_size; i++)

+ table->rl_entry[i].index = i + 1;

+ return 0;

+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)

+ struct mlx5_rl_table *table = &dev->priv.rl_table;

+ int i;

+ /* Clear all configured rates */

+ for (i = 0; i < table->max_size; i++)

+ if (table->rl_entry[i].rate)

+ mlx5_set_rate_limit_cmd(dev, 0, 0,

+ table->rl_entry[i].index);

+ kfree(dev->priv.rl_table.rl_entry);

+#endif

diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index b5000c32eafd..9afe61389a8d 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h

@@ -49,6 +49,7 @@

#include <netinet/udp.h>

#include <net/ethernet.h>

#include <sys/buf_ring.h>

+#include <sys/kthread.h>

#include "opt_rss.h"

@@ -711,6 +712,10 @@ struct mlx5e_flow_tables {

struct mlx5e_flow_table inner_rss;

};

+#ifdef RATELIMIT

+#include "en_rl.h"

+#endif

#define MLX5E_TSTMP_PREC 10

struct mlx5e_clbr_point {

@@ -778,6 +783,9 @@ struct mlx5e_priv {

int media_active_last;

struct callout watchdog;

+#ifdef RATELIMIT

+ struct mlx5e_rl_priv_data rl;

+#endif

struct callout tstmp_clbr;

int clbr_done;

diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h
new file mode 100644
index 000000000000..4e2c6c539857
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/en_rl.h

@@ -0,0 +1,174 @@