diff options
| author | Bartosz Sobczak <bartosz.sobczak@intel.com> | 2026-03-13 11:56:25 +0000 |
|---|---|---|
| committer | Krzysztof Galazka <kgalazka@FreeBSD.org> | 2026-03-13 12:00:55 +0000 |
| commit | 5b7aa6c7bc9db19e8bd34a5b7892fb5df2a3068b (patch) | |
| tree | a8a0e3bc62a4290abc97d238023721f4fde56b07 | |
| parent | 13ee84c591f8df7553fc8e3dac7e92409046f4d2 (diff) | |
irdma(4): update irdma to version 1.3.56-k
Update Intel irdma driver to version 1.3.56-k
Notable changes:
- adding E830 support
- adding E835 support
Signed-off-by: Sobczak, Bartosz <bartosz.sobczak@intel.com>
Reviewed by: Andrew Zhu <anzhu@netapp.com>
Tested by: Mateusz Moga <mateusz.moga@intel.com>
MFC after: 2 weeks
Sponsored by: Intel Corporation
Differential Revision: https://reviews.freebsd.org/D55479
37 files changed, 2225 insertions, 1234 deletions
diff --git a/contrib/ofed/libirdma/ice_devids.h b/contrib/ofed/libirdma/ice_devids.h index 57a7f2f7c2af..0cf7aa6aee22 100644 --- a/contrib/ofed/libirdma/ice_devids.h +++ b/contrib/ofed/libirdma/ice_devids.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2019 - 2020 Intel Corporation + * Copyright (c) 2019 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -88,4 +88,28 @@ #define ICE_DEV_ID_E822L_10G_BASE_T 0x1899 /* Intel(R) Ethernet Connection E822-L 1GbE */ #define ICE_DEV_ID_E822L_SGMII 0x189A +/* Intel(R) Ethernet Controller E830-CC for backplane */ +#define ICE_DEV_ID_E830_BACKPLANE 0x12D1 +/* Intel(R) Ethernet Controller E830-CC for QSFP */ +#define ICE_DEV_ID_E830_QSFP56 0x12D2 +/* Intel(R) Ethernet Controller E830-CC for SFP */ +#define ICE_DEV_ID_E830_SFP 0x12D3 +/* Intel(R) Ethernet Controller E830-CC for SFP-DD */ +#define ICE_DEV_ID_E830_SFP_DD 0x12D4 +/* Intel(R) Ethernet Controller E830-C for backplane */ +#define ICE_DEV_ID_E830C_BACKPLANE 0x12D5 +/* Intel(R) Ethernet Controller E830-XXV for backplane */ +#define ICE_DEV_ID_E830_XXV_BACKPLANE 0x12DC +/* Intel(R) Ethernet Controller E830-C for QSFP */ +#define ICE_DEV_ID_E830C_QSFP 0x12D8 +/* Intel(R) Ethernet Controller E830-XXV for QSFP */ +#define ICE_DEV_ID_E830_XXV_QSFP 0x12DD +/* Intel(R) Ethernet Controller E830-C for SFP */ +#define ICE_DEV_ID_E830C_SFP 0x12DA +/* Intel(R) Ethernet Controller E830-XXV for SFP */ +#define ICE_DEV_ID_E830_XXV_SFP 0x12DE +/* Intel(R) Ethernet Controller E835-XXV for SFP */ +#define ICE_DEV_ID_E835_XXV_SFP 0x124A +/* Intel(R) Ethernet Controller E835-CC for QSFP */ +#define ICE_DEV_ID_E835_QSFP 0x1249 #endif /* ICE_DEVIDS_H */ diff --git a/contrib/ofed/libirdma/irdma.h b/contrib/ofed/libirdma/irdma.h index f4a5a4796f82..6b85ff1a7105 100644 --- a/contrib/ofed/libirdma/irdma.h +++ b/contrib/ofed/libirdma/irdma.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2022 Intel Corporation + * Copyright (c) 2017 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -57,6 +57,7 @@ struct irdma_uk_attrs { u32 max_hw_wq_quanta; u32 min_hw_cq_size; u32 max_hw_cq_size; + u16 max_hw_push_len; u16 max_hw_sq_chunk; u16 min_hw_wq_size; u8 hw_rev; diff --git a/contrib/ofed/libirdma/irdma_defs.h b/contrib/ofed/libirdma/irdma_defs.h index 39d4e7772c31..7deaf762c204 100644 --- a/contrib/ofed/libirdma/irdma_defs.h +++ b/contrib/ofed/libirdma/irdma_defs.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -63,6 +63,27 @@ #define IRDMA_BYTE_200 200 #define IRDMA_BYTE_208 208 #define IRDMA_BYTE_216 216 +#define IRDMA_BYTE_224 224 +#define IRDMA_BYTE_232 232 +#define IRDMA_BYTE_240 240 +#define IRDMA_BYTE_248 248 +#define IRDMA_BYTE_256 256 +#define IRDMA_BYTE_264 264 +#define IRDMA_BYTE_272 272 +#define IRDMA_BYTE_280 280 +#define IRDMA_BYTE_288 288 +#define IRDMA_BYTE_296 296 +#define IRDMA_BYTE_304 304 +#define IRDMA_BYTE_312 312 +#define IRDMA_BYTE_320 320 +#define IRDMA_BYTE_328 328 +#define IRDMA_BYTE_336 336 +#define IRDMA_BYTE_344 344 +#define IRDMA_BYTE_352 352 +#define IRDMA_BYTE_360 360 +#define IRDMA_BYTE_368 368 +#define IRDMA_BYTE_376 376 +#define IRDMA_BYTE_384 384 #define IRDMA_QP_TYPE_IWARP 1 #define IRDMA_QP_TYPE_UDA 2 @@ -81,6 +102,8 @@ #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 +#define IRDMA_DEFAULT_MAX_PUSH_LEN 8192 + #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 @@ -241,7 +264,7 @@ #define IRDMAQPSQ_DESTQPN_S 32 #define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) #define IRDMAQPSQ_AHID_S 0 -#define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) +#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0) #define IRDMAQPSQ_INLINEDATAFLAG_S 57 #define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) @@ -338,9 +361,9 @@ #define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \ { \ u32 size; \ - size = (_ring).size; \ + size = IRDMA_RING_SIZE(_ring); \ if (!IRDMA_RING_FULL_ERR(_ring)) { \ - (_ring).head = ((_ring).head + 1) % size; \ + IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ @@ -349,79 +372,40 @@ #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ u32 size; \ - size = (_ring).size; \ + size = IRDMA_RING_SIZE(_ring); \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \ - (_ring).head = ((_ring).head + (_count)) % size; \ - (_retcode) = 0; \ - } else { \ - (_retcode) = ENOSPC; \ - } \ - } -#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ - { \ - u32 size; \ - size = (_ring).size; \ - if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \ - (_ring).head = ((_ring).head + 1) % size; \ - (_retcode) = 0; \ - } else { \ - (_retcode) = ENOSPC; \ - } \ - } -#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ - { \ - u32 size; \ - size = (_ring).size; \ - if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \ - (_ring).head = ((_ring).head + (_count)) % size; \ + IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ } \ } -#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ - (_ring).head = ((_ring).head + (_count)) % (_ring).size -#define IRDMA_RING_MOVE_TAIL(_ring) \ - (_ring).tail = ((_ring).tail + 1) % (_ring).size +#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ + (IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + (_count)) % IRDMA_RING_SIZE(_ring)) #define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \ - (_ring).head = ((_ring).head + 1) % (_ring).size + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, 1) #define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ - (_ring).tail = ((_ring).tail + (_count)) % (_ring).size + IRDMA_RING_CURRENT_TAIL(_ring) = (IRDMA_RING_CURRENT_TAIL(_ring) + (_count)) % IRDMA_RING_SIZE(_ring) + +#define IRDMA_RING_MOVE_TAIL(_ring) \ + IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, 1) #define IRDMA_RING_SET_TAIL(_ring, _pos) \ - (_ring).tail = (_pos) % (_ring).size + IRDMA_RING_CURRENT_TAIL(_ring) = (_pos) % IRDMA_RING_SIZE(_ring) #define IRDMA_RING_FULL_ERR(_ring) \ ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ - ) - -#define IRDMA_ERR_RING_FULL2(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \ - ) - -#define IRDMA_ERR_RING_FULL3(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \ + (IRDMA_RING_USED_QUANTA(_ring) == (IRDMA_RING_SIZE(_ring) - 1)) \ ) #define IRDMA_SQ_RING_FULL_ERR(_ring) \ ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \ + (IRDMA_RING_USED_QUANTA(_ring) == (IRDMA_RING_SIZE(_ring) - 257)) \ ) -#define IRDMA_ERR_SQ_RING_FULL2(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \ - ) -#define IRDMA_ERR_SQ_RING_FULL3(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \ - ) #define IRDMA_RING_MORE_WORK(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) != 0) \ @@ -429,17 +413,17 @@ #define IRDMA_RING_USED_QUANTA(_ring) \ ( \ - (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ + ((IRDMA_RING_CURRENT_HEAD(_ring) + IRDMA_RING_SIZE(_ring) - IRDMA_RING_CURRENT_TAIL(_ring)) % IRDMA_RING_SIZE(_ring)) \ ) #define IRDMA_RING_FREE_QUANTA(_ring) \ ( \ - ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \ + (IRDMA_RING_SIZE(_ring) - IRDMA_RING_USED_QUANTA(_ring) - 1) \ ) #define IRDMA_SQ_RING_FREE_QUANTA(_ring) \ ( \ - ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \ + (IRDMA_RING_SIZE(_ring) - IRDMA_RING_USED_QUANTA(_ring) - 257) \ ) #define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ diff --git a/contrib/ofed/libirdma/irdma_uk.c b/contrib/ofed/libirdma/irdma_uk.c index 115c5f0a27f0..c42d0f3e9673 100644 --- a/contrib/ofed/libirdma/irdma_uk.c +++ b/contrib/ofed/libirdma/irdma_uk.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -133,16 +133,18 @@ irdma_nop_1(struct irdma_qp_uk *qp) void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) { - __le64 *wqe; + struct irdma_qp_quanta *sq; u32 wqe_idx; if (!(qp_wqe_idx & 0x7F)) { wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; - wqe = qp->sq_base[wqe_idx].elem; + sq = qp->sq_base + wqe_idx; if (wqe_idx) - memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); + memset(sq, qp->swqe_polarity ? 0 : 0xFF, + 128 * sizeof(*sq)); else - memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); + memset(sq, qp->swqe_polarity ? 0xFF : 0, + 128 * sizeof(*sq)); } } @@ -200,22 +202,65 @@ irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) qp->push_dropped = false; } +/** + * irdma_qp_push_wqe - setup push wqe and ring db + * @qp: hw qp ptr + * @wqe: wqe ptr + * @quanta: numbers of quanta in wqe + * @wqe_idx: wqe index + * @push_wqe: if to use push for the wqe + */ void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 * wqe, u16 quanta, - u32 wqe_idx, bool post_sq) + u32 wqe_idx, bool push_wqe) { __le64 *push; - if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != - IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && - !qp->push_mode) { - irdma_uk_qp_post_wr(qp); - } else { + if (push_wqe) { push = (__le64 *) ((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); irdma_memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); irdma_qp_ring_push_db(qp, wqe_idx); + qp->last_push_db = true; + } else if (qp->last_push_db) { + qp->last_push_db = false; + db_wr32(qp->qp_id, qp->wqe_alloc_db); + } else { + irdma_uk_qp_post_wr(qp); + } +} + +/** + * irdma_push_ring_free - check if sq ring free to pust push wqe + * @qp: hw qp ptr + */ +static inline bool +irdma_push_ring_free(struct irdma_qp_uk *qp) +{ + u32 head, tail; + + head = IRDMA_RING_CURRENT_HEAD(qp->initial_ring); + tail = IRDMA_RING_CURRENT_TAIL(qp->sq_ring); + + if (head == tail || head == (tail + 1)) + return true; + + return false; +} + +/** + * irdma_enable_push_wqe - depending on sq ring and total size + * @qp: hw qp ptr + * @total_size: total data size + */ +static inline bool +irdma_enable_push_wqe(struct irdma_qp_uk *qp, u32 total_size) +{ + if (irdma_push_ring_free(qp) && + total_size <= qp->uk_attrs->max_hw_push_len) { + return true; } + return false; } /** @@ -234,7 +279,8 @@ irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, __le64 *wqe; __le64 *wqe_0 = NULL; u32 nop_wqe_idx; - u16 avail_quanta, wqe_quanta = *quanta; + u16 wqe_quanta = *quanta; + u16 avail_quanta; u16 i; avail_quanta = qp->uk_attrs->max_hw_sq_chunk - @@ -330,7 +376,7 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) @@ -350,11 +396,13 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); @@ -399,8 +447,8 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -429,7 +477,7 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, u16 quanta; u64 hdr; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe &= qp->push_db ? true : false; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) @@ -451,7 +499,6 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, qp->ord_cnt = 0; } - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; addl_frag_cnt = op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; local_fence |= info->local_fence; @@ -490,8 +537,8 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -517,7 +564,7 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) @@ -534,6 +581,9 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; @@ -587,8 +637,8 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -780,11 +830,11 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, return EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; read_fence |= info->read_fence; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); @@ -812,8 +862,8 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -886,8 +936,8 @@ irdma_uk_inline_send(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -937,8 +987,8 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -989,8 +1039,8 @@ irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -1226,26 +1276,25 @@ irdma_check_rq_cqe(struct irdma_qp_uk *qp, u32 *array_idx) } /** - * irdma_skip_duplicate_flush_cmpl - check last cmpl and update wqe if needed - * - * @ring: sq/rq ring - * @flush_seen: information if flush for specific ring was already seen - * @comp_status: completion status - * @wqe_idx: new value of WQE index returned if there is more work on ring + * irdma_uk_cq_empty - Check if CQ is empty + * @cq: hw cq */ -static inline int -irdma_skip_duplicate_flush_cmpl(struct irdma_ring ring, u8 flush_seen, - enum irdma_cmpl_status comp_status, - u32 *wqe_idx) +bool +irdma_uk_cq_empty(struct irdma_cq_uk *cq) { - if (flush_seen) { - if (IRDMA_RING_MORE_WORK(ring)) - *wqe_idx = ring.tail; - else - return ENOENT; - } + __le64 *cqe; + u8 polarity; + u64 qword3; - return 0; + if (cq->avoid_mem_cflct) + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); + else + cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); + + get_64bit_val(cqe, 24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + + return polarity != cq->polarity; } /** @@ -1338,6 +1387,10 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; + if (!qp || qp->destroy_pending) { + ret_code = EFAULT; + goto exit; + } if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); @@ -1367,10 +1420,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); - if (!qp || qp->destroy_pending) { - ret_code = EFAULT; - goto exit; - } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle) (irdma_uintptr) qp; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); @@ -1378,51 +1427,44 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; - ret_code = irdma_skip_duplicate_flush_cmpl(qp->rq_ring, - qp->rq_flush_seen, - info->comp_status, - &wqe_idx); - if (ret_code != 0) - goto exit; - array_idx = wqe_idx / qp->rq_wqe_size_multiplier; + info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); + info->signaled = 1; + + if (qword3 & IRDMACQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); + } else { + info->stag_invalid_set = false; + } if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { + ret_code = pthread_spin_lock(qp->lock); + if (ret_code) + return ret_code; if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { ret_code = ENOENT; + pthread_spin_unlock(qp->lock); goto exit; } info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail]; - info->signaled = 1; - array_idx = qp->rq_ring.tail; + IRDMA_RING_SET_TAIL(qp->rq_ring, qp->rq_ring.tail + 1); + if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) + qp->rq_flush_complete = true; + else + move_cq_head = false; + pthread_spin_unlock(qp->lock); } else { info->wr_id = qp->rq_wrid_array[array_idx]; - info->signaled = 1; if (irdma_check_rq_cqe(qp, &array_idx)) { info->wr_id = qp->rq_wrid_array[array_idx]; info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); return 0; } - } - - info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); - - if (qword3 & IRDMACQ_STAG) { - info->stag_invalid_set = true; - info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); - } else { - info->stag_invalid_set = false; - } - IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); - if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { - qp->rq_flush_seen = true; - if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) - qp->rq_flush_complete = true; - else - move_cq_head = false; + IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); } pring = &qp->rq_ring; } else { /* q_type is IRDMA_CQE_QTYPE_SQ */ @@ -1444,12 +1486,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, qp->push_mode = false; qp->push_dropped = true; } - ret_code = irdma_skip_duplicate_flush_cmpl(qp->sq_ring, - qp->sq_flush_seen, - info->comp_status, - &wqe_idx); - if (ret_code != 0) - goto exit; if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; info->signaled = qp->sq_wrtrk_array[wqe_idx].signaled; @@ -1459,10 +1495,9 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, IRDMA_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { - if (pthread_spin_lock(qp->lock)) { - ret_code = ENOENT; - goto exit; - } + ret_code = pthread_spin_lock(qp->lock); + if (ret_code) + return ret_code; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { pthread_spin_unlock(qp->lock); ret_code = ENOENT; @@ -1493,7 +1528,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, if (info->op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; - qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) qp->sq_flush_complete = true; pthread_spin_unlock(qp->lock); @@ -1508,6 +1542,7 @@ exit: if (pring && IRDMA_RING_MORE_WORK(*pring)) move_cq_head = false; } + if (move_cq_head) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) @@ -1522,8 +1557,9 @@ exit: IRDMA_RING_MOVE_TAIL(cq->cq_ring); if (!cq->avoid_mem_cflct && ext_valid) IRDMA_RING_MOVE_TAIL(cq->cq_ring); - set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, - IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); + if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq)) + set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, + IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); } else { qword3 &= ~IRDMA_CQ_WQEIDX; qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); @@ -1537,9 +1573,7 @@ exit: * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ -static int -irdma_round_up_wq(u32 wqdepth) -{ +static u64 irdma_round_up_wq(u64 wqdepth) { int scount = 1; for (wqdepth--; scount <= 16; scount *= 2) @@ -1588,15 +1622,16 @@ irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD); - *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); - - if (*sqdepth < min_size) - *sqdepth = min_size; - else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_wq_quanta) return EINVAL; + *sqdepth = hw_quanta; return 0; } @@ -1607,15 +1642,16 @@ irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *s int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - - *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD); - if (*rqdepth < min_size) - *rqdepth = min_size; - else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_rq_quanta) return EINVAL; + *rqdepth = hw_quanta; return 0; } diff --git a/contrib/ofed/libirdma/irdma_umain.c b/contrib/ofed/libirdma/irdma_umain.c index e8d27c31a0dc..63b082a5aa2b 100644 --- a/contrib/ofed/libirdma/irdma_umain.c +++ b/contrib/ofed/libirdma/irdma_umain.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -48,7 +48,7 @@ /** * Driver version */ -char libirdma_version[] = "1.2.36-k"; +char libirdma_version[] = "1.3.56-k"; unsigned int irdma_dbg; @@ -87,6 +87,18 @@ static const struct hca_info hca_table[] = { INTEL_HCA(ICE_DEV_ID_E822L_SFP), INTEL_HCA(ICE_DEV_ID_E822L_10G_BASE_T), INTEL_HCA(ICE_DEV_ID_E822L_SGMII), + INTEL_HCA(ICE_DEV_ID_E830_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E830_QSFP56), + INTEL_HCA(ICE_DEV_ID_E830_SFP), + INTEL_HCA(ICE_DEV_ID_E830_SFP_DD), + INTEL_HCA(ICE_DEV_ID_E830C_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E830_XXV_BACKPLANE), + INTEL_HCA(ICE_DEV_ID_E830C_QSFP), + INTEL_HCA(ICE_DEV_ID_E830_XXV_QSFP), + INTEL_HCA(ICE_DEV_ID_E830C_SFP), + INTEL_HCA(ICE_DEV_ID_E830_XXV_SFP), + INTEL_HCA(ICE_DEV_ID_E835_XXV_SFP), + INTEL_HCA(ICE_DEV_ID_E835_QSFP), }; static struct ibv_context_ops irdma_ctx_ops = { @@ -239,7 +251,7 @@ irdma_driver_init(const char *uverbs_sys_path, hca_size = sizeof(hca_table) / sizeof(struct hca_info); while (i < hca_size && !device_found) { - if (device_id != hca_table[i].device) + if (device_id == hca_table[i].device) device_found = 1; ++i; } diff --git a/contrib/ofed/libirdma/irdma_user.h b/contrib/ofed/libirdma/irdma_user.h index aeb6aa9feebd..c9f707380c59 100644 --- a/contrib/ofed/libirdma/irdma_user.h +++ b/contrib/ofed/libirdma/irdma_user.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -94,12 +94,10 @@ enum irdma_device_caps_const { IRDMA_MIN_IW_QP_ID = 0, IRDMA_QUERY_FPM_BUF_SIZE = 176, IRDMA_COMMIT_FPM_BUF_SIZE = 176, - IRDMA_MAX_IW_QP_ID = 262143, IRDMA_MIN_CEQID = 0, IRDMA_MAX_CEQID = 1023, IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, IRDMA_MIN_CQID = 0, - IRDMA_MAX_CQID = 524287, IRDMA_MIN_AEQ_ENTRIES = 1, IRDMA_MAX_AEQ_ENTRIES = 524287, IRDMA_MIN_CEQ_ENTRIES = 1, @@ -188,7 +186,7 @@ struct irdma_cq_uk_init_info; struct irdma_ring { volatile u32 head; - volatile u32 tail; /* effective tail */ + volatile u32 tail; u32 size; }; @@ -327,6 +325,7 @@ struct irdma_wqe_uk_ops { struct irdma_bind_window *op_info); }; +bool irdma_uk_cq_empty(struct irdma_cq_uk *cq); int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, @@ -364,6 +363,8 @@ struct irdma_qp_uk { __le64 *shadow_area; __le32 *push_db; __le64 *push_wqe; + void *push_db_map; + void *push_wqe_map; struct irdma_ring sq_ring; struct irdma_ring sq_sig_ring; struct irdma_ring rq_ring; @@ -393,12 +394,11 @@ struct irdma_qp_uk { bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ + bool last_push_db:1; /* Indicates last DB was push DB */ void *back_qp; pthread_spinlock_t *lock; u8 dbg_rq_flushed; u16 ord_cnt; - u8 sq_flush_seen; - u8 rq_flush_seen; u8 rd_fence_rate; }; @@ -462,9 +462,11 @@ int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); -int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth); -int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth); +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, + u8 shift, u32 *sqdepth); +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, + u8 shift, u32 *rqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq); + u32 wqe_idx, bool push_wqe); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); #endif /* IRDMA_USER_H */ diff --git a/contrib/ofed/libirdma/irdma_uverbs.c b/contrib/ofed/libirdma/irdma_uverbs.c index e52ce1cfa229..aee904a087bf 100644 --- a/contrib/ofed/libirdma/irdma_uverbs.c +++ b/contrib/ofed/libirdma/irdma_uverbs.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (C) 2019 - 2023 Intel Corporation + * Copyright (C) 2019 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -221,7 +221,7 @@ irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access) { struct irdma_urereg_mr cmd = {}; - struct ibv_rereg_mr_resp resp; + struct ibv_rereg_mr_resp resp = {}; cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, @@ -258,7 +258,7 @@ irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; struct ibv_alloc_mw cmd; - struct ibv_alloc_mw_resp resp; + struct ibv_alloc_mw_resp resp = {}; int err; mw = calloc(1, sizeof(*mw)); @@ -344,11 +344,11 @@ irdma_udealloc_mw(struct ibv_mw *mw) } static void * -irdma_alloc_hw_buf(size_t size) +irdma_calloc_hw_buf_sz(size_t size, size_t alignment) { void *buf; - buf = memalign(IRDMA_HW_PAGE_SIZE, size); + buf = memalign(alignment, size); if (!buf) return NULL; @@ -356,10 +356,17 @@ irdma_alloc_hw_buf(size_t size) free(buf); return NULL; } + memset(buf, 0, size); return buf; } +static void * +irdma_calloc_hw_buf(size_t size) +{ + return irdma_calloc_hw_buf_sz(size, IRDMA_HW_PAGE_SIZE); +} + static void irdma_free_hw_buf(void *buf, size_t size) { @@ -376,11 +383,14 @@ irdma_free_hw_buf(void *buf, size_t size) static inline int get_cq_size(int ncqe, u8 hw_rev) { - ncqe++; + + ncqe += 2; /* Completions with immediate require 1 extra entry */ if (hw_rev > IRDMA_GEN_1) ncqe *= 2; + if (ncqe & 1) + ncqe += 1; /* cq size must be an even number */ if (ncqe < IRDMA_U_MINCQ_SIZE) ncqe = IRDMA_U_MINCQ_SIZE; @@ -460,13 +470,12 @@ ucreate_cq(struct ibv_context *context, total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; iwucq->buf_size = total_size; - info.cq_base = irdma_alloc_hw_buf(total_size); + info.cq_base = irdma_calloc_hw_buf(total_size); if (!info.cq_base) { ret = ENOMEM; goto err_cq_base; } - memset(info.cq_base, 0, total_size); reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; reg_mr_cmd.cq_pages = cq_pages; @@ -481,13 +490,12 @@ ucreate_cq(struct ibv_context *context, iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { - info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); + info.shadow_area = irdma_calloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); if (!info.shadow_area) { ret = ENOMEM; goto err_alloc_shadow; } - memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; reg_mr_shadow_cmd.cq_pages = 1; @@ -621,16 +629,9 @@ irdma_udestroy_cq(struct ibv_cq *cq) iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; - - ret = pthread_spin_destroy(&iwucq->lock); - if (ret) - goto err; - - irdma_process_resize_list(iwucq, NULL); ret = ibv_cmd_destroy_cq(cq); if (ret) - goto err; - + return ret; ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); @@ -638,11 +639,15 @@ irdma_udestroy_cq(struct ibv_cq *cq) ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); } + + irdma_process_resize_list(iwucq, NULL); + + ret = pthread_spin_destroy(&iwucq->lock); + if (ret) + return ret; + free(iwucq); return 0; - -err: - return ret; } static enum ibv_wc_status @@ -786,6 +791,10 @@ irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) if (ib_qp->qp_type == IBV_QPT_UD) { entry->src_qp = cur_cqe->ud_src_qpn; +#define IRDMA_PKT_TYPE_ROCE_V2_IPV4 1 +#define IRDMA_PKT_TYPE_ROCE_V2_IPV6 2 + entry->sl = cur_cqe->ipv4 ? IRDMA_PKT_TYPE_ROCE_V2_IPV4 : + IRDMA_PKT_TYPE_ROCE_V2_IPV6; entry->wc_flags |= IBV_WC_GRH; } else { entry->src_qp = cur_cqe->qp_id; @@ -1257,9 +1266,9 @@ irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) return ret; if (iwuqp->qp.push_db) - irdma_munmap(iwuqp->qp.push_db); + irdma_munmap(iwuqp->qp.push_db_map); if (iwuqp->qp.push_wqe) - irdma_munmap(iwuqp->qp.push_wqe); + irdma_munmap(iwuqp->qp.push_wqe_map); ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); @@ -1286,18 +1295,28 @@ irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, struct irdma_ucreate_qp_resp resp = {}; struct irdma_ureg_mr reg_mr_cmd = {}; struct ibv_reg_mr_resp reg_mr_resp = {}; + struct irdma_uvcontext *iwvctx; int ret; + long os_pgsz = IRDMA_HW_PAGE_SIZE; sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; - info->sq = irdma_alloc_hw_buf(totalqpsize); - iwuqp->buf_size = totalqpsize; + iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); + /* adjust alignment for iwarp */ + if (iwvctx->ibv_ctx.device->transport_type == + IBV_TRANSPORT_IWARP) { + long pgsz = sysconf(_SC_PAGESIZE); + + if (pgsz > 0) + os_pgsz = pgsz; + } + info->sq = irdma_calloc_hw_buf_sz(totalqpsize, os_pgsz); if (!info->sq) return ENOMEM; - memset(info->sq, 0, totalqpsize); + iwuqp->buf_size = totalqpsize; info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; @@ -1516,7 +1535,6 @@ irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { u64 offset; - void *map; int ret; ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, @@ -1533,22 +1551,19 @@ irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) return ret; offset = resp.push_wqe_mmap_key; - map = irdma_mmap(qp->context->cmd_fd, offset); - if (map == MAP_FAILED) + iwuqp->qp.push_wqe_map = irdma_mmap(qp->context->cmd_fd, offset); + if (iwuqp->qp.push_wqe_map == MAP_FAILED) return ret; - iwuqp->qp.push_wqe = map; - offset = resp.push_db_mmap_key; - map = irdma_mmap(qp->context->cmd_fd, offset); - if (map == MAP_FAILED) { - irdma_munmap(iwuqp->qp.push_wqe); - iwuqp->qp.push_wqe = NULL; + iwuqp->qp.push_db_map = irdma_mmap(qp->context->cmd_fd, offset); + if (iwuqp->qp.push_db_map == MAP_FAILED) { + irdma_munmap(iwuqp->qp.push_wqe_map); printf("failed to map push page, errno %d\n", errno); return ret; } - iwuqp->qp.push_wqe += resp.push_offset; - iwuqp->qp.push_db = map + resp.push_offset; + iwuqp->qp.push_wqe = iwuqp->qp.push_wqe_map + resp.push_offset; + iwuqp->qp.push_db = iwuqp->qp.push_db_map + resp.push_offset; return ret; } else { @@ -1766,7 +1781,10 @@ irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); break; case IBV_WR_BIND_MW: - if (ib_qp->qp_type != IBV_QPT_RC) { + if (ib_qp->qp_type != IBV_QPT_RC || + (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1 && + ib_wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_ZERO_BASED)) { err = EINVAL; break; } @@ -1785,16 +1803,14 @@ irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, info.op.bind_window.mw_stag = calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; - } - if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { + if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; - info.op.bind_window.va = NULL; - } else { + else info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; - info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; - } + + info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; info.op.bind_window.ena_reads = (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; @@ -1845,7 +1861,7 @@ irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, struct irdma_post_rq_info post_recv = {}; struct irdma_uqp *iwuqp; bool reflush = false; - int err = 0; + int err; iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); err = pthread_spin_lock(&iwuqp->lock); @@ -2005,12 +2021,10 @@ irdma_uresize_cq(struct ibv_cq *cq, int cqe) cq_size = get_cq_total_bytes(cqe_needed); cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; - cq_base = irdma_alloc_hw_buf(cq_size); + cq_base = irdma_calloc_hw_buf(cq_size); if (!cq_base) return ENOMEM; - memset(cq_base, 0, cq_size); - cq_buf = malloc(sizeof(*cq_buf)); if (!cq_buf) { ret = ENOMEM; diff --git a/sys/dev/irdma/fbsd_kcompat.c b/sys/dev/irdma/fbsd_kcompat.c index 32a9bdb4f969..4888c4ddd5ad 100644 --- a/sys/dev/irdma/fbsd_kcompat.c +++ b/sys/dev/irdma/fbsd_kcompat.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2023 Intel Corporation + * Copyright (c) 2021 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +36,7 @@ #include "ice_rdma.h" #include "irdma_di_if.h" #include "irdma_main.h" +#include "icrdma_hw.h" #include <sys/gsb_crc32.h> #include <netinet/in_fib.h> #include <netinet6/in6_fib.h> @@ -44,6 +45,11 @@ /* additional QP debuging option. Keep false unless needed */ bool irdma_upload_context = false; +u8 irdma_sysctl_max_ord = ICRDMA_MAX_ORD_SIZE; +u8 irdma_sysctl_max_ird = ICRDMA_MAX_IRD_SIZE; +u8 irdma_rdpu_bw_tun = 0; + +static void irdma_modify_rdpu_bw(struct irdma_pci_f *rf); inline u32 irdma_rd32(struct irdma_dev_ctx *dev_ctx, u32 reg){ @@ -566,6 +572,9 @@ irdma_set_rf_user_cfg_params(struct irdma_pci_f *rf) rf->rst_to = IRDMA_RST_TIMEOUT_HZ; /* Enable DCQCN algorithm by default */ rf->dcqcn_ena = true; + + if (irdma_fw_major_ver(&rf->sc_dev) == 2 && irdma_rdpu_bw_tun) + irdma_modify_rdpu_bw(rf); } /** @@ -593,12 +602,87 @@ irdma_sysctl_dcqcn_update(SYSCTL_HANDLER_ARGS) return 0; } +static void +irdma_modify_rdpu_bw(struct irdma_pci_f *rf) +{ + u32 val; +#define GL_RDPU_CNTRL 0x00052054 + + val = rd32(&rf->hw, GL_RDPU_CNTRL); + printf("pf%d Read GL_RDPU_CNTRL[%x] = 0x%08X", + if_getdunit(rf->peer_info->ifp), GL_RDPU_CNTRL, val); + + /* Clear the load balancing bit */ + val &= ~(0x1 << 2); + wr32(&rf->hw, GL_RDPU_CNTRL, val); + val = rd32(&rf->hw, GL_RDPU_CNTRL); + printf("pf%d Set GL_RDPU_CNTRL[%x] = 0x%08X", + if_getdunit(rf->peer_info->ifp), GL_RDPU_CNTRL, val); +} + +enum irdma_qos_info { + IRDMA_QOS_DSCP_MAP = 1, + IRDMA_QOS_DSCP_MODE, + IRDMA_QOS_PRIO_TYPE, + IRDMA_QOS_QS_HANDLE, + IRDMA_QOS_REL_BW, + IRDMA_QOS_TC, + IRDMA_QOS_UP2TC +}; + enum irdma_cqp_stats_info { IRDMA_CQP_REQ_CMDS = 28, IRDMA_CQP_CMPL_CMDS = 29 }; static int +irdma_sysctl_qos(SYSCTL_HANDLER_ARGS) +{ + struct irdma_sc_vsi *vsi = (struct irdma_sc_vsi *)arg1; + char rslt[192] = "no vsi available yet"; + int rslt_size = sizeof(rslt) - 1; + int option = (int)arg2; + int a; + + if (!vsi) { + return sysctl_handle_string(oidp, rslt, sizeof(rslt), req); + + } + + snprintf(rslt, sizeof(rslt), ""); + switch (option) { + case IRDMA_QOS_PRIO_TYPE: + for (a = 0; a < IRDMA_MAX_USER_PRIORITY; a++) + snprintf(rslt, rslt_size, "%s %02x", rslt, vsi->qos[a].prio_type); + break; + case IRDMA_QOS_REL_BW: + for (a = 0; a < IRDMA_MAX_USER_PRIORITY; a++) + snprintf(rslt, rslt_size, "%s %d", rslt, vsi->qos[a].rel_bw); + break; + case IRDMA_QOS_QS_HANDLE: + for (a = 0; a < IRDMA_MAX_USER_PRIORITY; a++) + snprintf(rslt, rslt_size, "%s %d", rslt, vsi->qos[a].qs_handle); + break; + case IRDMA_QOS_TC: + for (a = 0; a < IRDMA_MAX_USER_PRIORITY; a++) + snprintf(rslt, rslt_size, "%s %d", rslt, vsi->qos[a].traffic_class); + break; + case IRDMA_QOS_UP2TC: + for (a = 0; a < IRDMA_MAX_USER_PRIORITY; a++) + snprintf(rslt, rslt_size, "%s %d", rslt, vsi->cfg_check[a].traffic_class); + break; + case IRDMA_QOS_DSCP_MAP: + for (a = 0; a < sizeof(vsi->dscp_map); a++) + snprintf(rslt, rslt_size, "%s%02x", rslt, vsi->dscp_map[a]); + break; + case IRDMA_QOS_DSCP_MODE: + snprintf(rslt, rslt_size, "%d", vsi->dscp_mode); + } + + return sysctl_handle_string(oidp, rslt, sizeof(rslt), req); +} + +static int irdma_sysctl_cqp_stats(SYSCTL_HANDLER_ARGS) { struct irdma_sc_cqp *cqp = (struct irdma_sc_cqp *)arg1; @@ -854,6 +938,7 @@ void irdma_sysctl_settings(struct irdma_pci_f *rf) { struct sysctl_oid_list *irdma_sysctl_oid_list; + u8 ird_ord_limit; irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); @@ -861,6 +946,79 @@ irdma_sysctl_settings(struct irdma_pci_f *rf) OID_AUTO, "upload_context", CTLFLAG_RWTUN, &irdma_upload_context, 0, "allow for generating QP's upload context, default=0"); + + if (rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + return; + +#define ICRDMA_HW_IRD_ORD_LIMIT 128 + SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, + OID_AUTO, "ord_max_value", CTLFLAG_RDTUN, + &irdma_sysctl_max_ord, ICRDMA_MAX_ORD_SIZE, + "Limit Outbound RDMA Read Queue Depth, dflt=32, max=128"); + + SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, + OID_AUTO, "ird_max_value", CTLFLAG_RDTUN, + &irdma_sysctl_max_ird, ICRDMA_MAX_IRD_SIZE, + "Limit Inbound RDMA Read Queue Depth, dflt=32, max=128"); + /* + * Ensure the ird/ord is equal and not more than ICRDMA_HW_IRD_ORD_LIMIT + */ + ird_ord_limit = min(irdma_sysctl_max_ord, irdma_sysctl_max_ird); + if (ird_ord_limit > ICRDMA_HW_IRD_ORD_LIMIT) + ird_ord_limit = ICRDMA_HW_IRD_ORD_LIMIT; + irdma_sysctl_max_ird = ird_ord_limit; + irdma_sysctl_max_ord = ird_ord_limit; + + SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, + OID_AUTO, "mod_rdpu_bw", CTLFLAG_RDTUN, + &irdma_rdpu_bw_tun, 0, + "Turn off RDPU BW balance, default=0"); +} + +/** + * irdma_qos_info_tunables_init - init tunables to read qos settings + * @rf: RDMA PCI function + */ +void +irdma_qos_info_tunables_init(struct irdma_pci_f *rf) +{ + struct irdma_sc_vsi *vsi = &rf->iwdev->vsi; + struct sysctl_oid_list *qos_oid_list; + + qos_oid_list = SYSCTL_CHILDREN(rf->tun_info.qos_sysctl_tree); + SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, + OID_AUTO, "vsi_rel_bw", CTLFLAG_RD, + &vsi->qos_rel_bw, 0, + "qos_rel_bw"); + SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, + OID_AUTO, "vsi_prio_type", CTLFLAG_RD, + &vsi->qos_prio_type, 0, "vsi prio type"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, + OID_AUTO, "dscp_mode", CTLFLAG_RD | CTLTYPE_STRING, + vsi, IRDMA_QOS_DSCP_MODE, irdma_sysctl_qos, "A", + "show dscp_mode"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "dscp_map", CTLFLAG_RD | CTLTYPE_STRING, vsi, + IRDMA_QOS_DSCP_MAP, irdma_sysctl_qos, "A", + "show dscp map"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "up2tc", CTLFLAG_RD | CTLTYPE_STRING, vsi, + IRDMA_QOS_UP2TC, irdma_sysctl_qos, "A", + "up to tc mapping"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "qs", CTLFLAG_RD | CTLTYPE_STRING, vsi, + IRDMA_QOS_QS_HANDLE, irdma_sysctl_qos, "A", + "qs_handle"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "tc", CTLFLAG_RD | CTLTYPE_STRING, vsi, IRDMA_QOS_TC, + irdma_sysctl_qos, "A", "tc list"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "rel_bw", CTLFLAG_RD | CTLTYPE_STRING, vsi, + IRDMA_QOS_REL_BW, irdma_sysctl_qos, "A", "relative bw"); + SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, qos_oid_list, OID_AUTO, + "prio_type", CTLFLAG_RD | CTLTYPE_STRING, vsi, + IRDMA_QOS_PRIO_TYPE, irdma_sysctl_qos, "A", + "prio_type"); } void diff --git a/sys/dev/irdma/fbsd_kcompat.h b/sys/dev/irdma/fbsd_kcompat.h index 064963bb93bd..cc51f1e7933a 100644 --- a/sys/dev/irdma/fbsd_kcompat.h +++ b/sys/dev/irdma/fbsd_kcompat.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2023 Intel Corporation + * Copyright (c) 2021 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #ifndef FBSD_KCOMPAT_H #define FBSD_KCOMPAT_H #include "ice_rdma.h" +#include "irdma-abi.h" #define TASKLET_DATA_TYPE unsigned long #define TASKLET_FUNC_TYPE void (*)(TASKLET_DATA_TYPE) @@ -77,9 +78,13 @@ void kc_set_roce_uverbs_cmd_mask(struct irdma_device *iwdev); void kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev); +extern u8 irdma_sysctl_max_ord; +extern u8 irdma_sysctl_max_ird; + struct irdma_tunable_info { struct sysctl_ctx_list irdma_sysctl_ctx; struct sysctl_oid *irdma_sysctl_tree; + struct sysctl_oid *qos_sysctl_tree; struct sysctl_oid *sws_sysctl_tree; char drv_ver[IRDMA_VER_LEN]; u8 roce_ena; @@ -142,7 +147,7 @@ void irdma_destroy_ah(struct ib_ah *ibah, u32 flags); void irdma_destroy_ah_stub(struct ib_ah *ibah, u32 flags); int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); -int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u8 *speed, u8 *width); +int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width); enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, u8 port_num); int irdma_roce_port_immutable(struct ib_device *ibdev, u8 port_num, @@ -190,6 +195,7 @@ int irdma_addr_resolve_neigh_ipv6(struct irdma_cm_node *cm_node, u32 *dest, int arpindex); void irdma_dcqcn_tunables_init(struct irdma_pci_f *rf); void irdma_sysctl_settings(struct irdma_pci_f *rf); +void irdma_qos_info_tunables_init(struct irdma_pci_f *rf); void irdma_sw_stats_tunables_init(struct irdma_pci_f *rf); u32 irdma_create_stag(struct irdma_device *iwdev); void irdma_free_stag(struct irdma_device *iwdev, u32 stag); @@ -201,6 +207,15 @@ int irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 len, struct irdma_mr; struct irdma_cq; struct irdma_cq_buf; +int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, + struct ib_udata *udata, + struct irdma_mr *iwmr); +int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req, + struct ib_udata *udata, + struct irdma_mr *iwmr); +struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, + u64 virt, int access, + struct ib_udata *udata); struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, diff --git a/sys/dev/irdma/ice_devids.h b/sys/dev/irdma/ice_devids.h index 57a7f2f7c2af..0cf7aa6aee22 100644 --- a/sys/dev/irdma/ice_devids.h +++ b/sys/dev/irdma/ice_devids.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2019 - 2020 Intel Corporation + * Copyright (c) 2019 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -88,4 +88,28 @@ #define ICE_DEV_ID_E822L_10G_BASE_T 0x1899 /* Intel(R) Ethernet Connection E822-L 1GbE */ #define ICE_DEV_ID_E822L_SGMII 0x189A +/* Intel(R) Ethernet Controller E830-CC for backplane */ +#define ICE_DEV_ID_E830_BACKPLANE 0x12D1 +/* Intel(R) Ethernet Controller E830-CC for QSFP */ +#define ICE_DEV_ID_E830_QSFP56 0x12D2 +/* Intel(R) Ethernet Controller E830-CC for SFP */ +#define ICE_DEV_ID_E830_SFP 0x12D3 +/* Intel(R) Ethernet Controller E830-CC for SFP-DD */ +#define ICE_DEV_ID_E830_SFP_DD 0x12D4 +/* Intel(R) Ethernet Controller E830-C for backplane */ +#define ICE_DEV_ID_E830C_BACKPLANE 0x12D5 +/* Intel(R) Ethernet Controller E830-XXV for backplane */ +#define ICE_DEV_ID_E830_XXV_BACKPLANE 0x12DC +/* Intel(R) Ethernet Controller E830-C for QSFP */ +#define ICE_DEV_ID_E830C_QSFP 0x12D8 +/* Intel(R) Ethernet Controller E830-XXV for QSFP */ +#define ICE_DEV_ID_E830_XXV_QSFP 0x12DD +/* Intel(R) Ethernet Controller E830-C for SFP */ +#define ICE_DEV_ID_E830C_SFP 0x12DA +/* Intel(R) Ethernet Controller E830-XXV for SFP */ +#define ICE_DEV_ID_E830_XXV_SFP 0x12DE +/* Intel(R) Ethernet Controller E835-XXV for SFP */ +#define ICE_DEV_ID_E835_XXV_SFP 0x124A +/* Intel(R) Ethernet Controller E835-CC for QSFP */ +#define ICE_DEV_ID_E835_QSFP 0x1249 #endif /* ICE_DEVIDS_H */ diff --git a/sys/dev/irdma/icrdma.c b/sys/dev/irdma/icrdma.c index a4f3904a820c..aeb07addcff7 100644 --- a/sys/dev/irdma/icrdma.c +++ b/sys/dev/irdma/icrdma.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2025 Intel Corporation + * Copyright (c) 2021 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -52,7 +52,7 @@ /** * Driver version */ -char irdma_driver_version[] = "1.2.37-k"; +char irdma_driver_version[] = "1.3.56-k"; /** * irdma_init_tunable - prepare tunables @@ -76,6 +76,10 @@ irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id) irdma_oid_list = SYSCTL_CHILDREN(t_info->irdma_sysctl_tree); + t_info->qos_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx, + irdma_oid_list, OID_AUTO, + "qos", CTLFLAG_RD, + NULL, ""); t_info->sws_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx, irdma_oid_list, OID_AUTO, "sw_stats", CTLFLAG_RD, @@ -418,6 +422,10 @@ irdma_finalize_task(void *context, int pending) "Starting deferred closing %d (%d)\n", rf->peer_info->pf_id, if_getdunit(peer->ifp)); atomic_dec(&rf->dev_ctx.event_rfcnt); + if (rf->rdma_ver == IRDMA_GEN_2 && !rf->ftype) { + cancel_delayed_work_sync(&iwdev->rf->dwork_cqp_poll); + irdma_free_stag(iwdev->rf->iwdev, iwdev->rf->chk_stag); + } wait_event_timeout(iwdev->suspend_wq, !atomic_read(&rf->dev_ctx.event_rfcnt), IRDMA_MAX_TIMEOUT); @@ -441,7 +449,10 @@ irdma_finalize_task(void *context, int pending) if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; - l2params.mtu = peer->mtu; +#define IRDMA_MIN_MTU_HEADERS IB_GRH_BYTES + IB_BTH_BYTES + 28 + l2params.mtu = (peer->mtu) ? peer->mtu : + ib_mtu_enum_to_int(IB_MTU_256) + + IRDMA_MIN_MTU_HEADERS; status = irdma_rt_init_hw(iwdev, &l2params); if (status) { irdma_pr_err("RT init failed %d\n", status); @@ -454,12 +465,21 @@ irdma_finalize_task(void *context, int pending) irdma_rt_deinit_hw(iwdev); ib_dealloc_device(&iwdev->ibdev); } + irdma_qos_info_tunables_init(rf); irdma_sw_stats_tunables_init(rf); req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE; req.enable_filter = true; IRDMA_DI_REQ_HANDLER(peer, &req); irdma_reg_ipaddr_event_cb(rf); atomic_inc(&rf->dev_ctx.event_rfcnt); + if (rf->rdma_ver == IRDMA_GEN_2 && !rf->ftype) { + INIT_DELAYED_WORK(&rf->dwork_cqp_poll, cqp_poll_worker); + rf->chk_stag = irdma_create_stag(rf->iwdev); + rf->used_mrs++; + mod_delayed_work(iwdev->cleanup_wq, &rf->dwork_cqp_poll, + msecs_to_jiffies(5000)); + } + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Deferred opening finished %d (%d)\n", rf->peer_info->pf_id, if_getdunit(peer->ifp)); @@ -562,8 +582,9 @@ irdma_probe(struct ice_rdma_peer *peer) struct irdma_handler *hdl; int err = 0; - irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n", - irdma_driver_version, peer, peer->pf_id, peer->ifp, + irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p\n", + irdma_driver_version, peer, peer->pf_id, peer->ifp); + irdma_pr_info("peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n", if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag); hdl = irdma_find_handler(peer); @@ -664,6 +685,7 @@ irdma_remove(struct ice_rdma_peer *peer) sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx); hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL; + hdl->iwdev->rf->tun_info.qos_sysctl_tree = NULL; hdl->iwdev->rf->tun_info.sws_sysctl_tree = NULL; irdma_ctrl_deinit_hw(iwdev->rf); diff --git a/sys/dev/irdma/icrdma_hw.c b/sys/dev/irdma/icrdma_hw.c index a046bf18a616..b3ac9ea0f3de 100644 --- a/sys/dev/irdma/icrdma_hw.c +++ b/sys/dev/irdma/icrdma_hw.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2023 Intel Corporation + * Copyright (c) 2017 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #include "osdep.h" #include "irdma_type.h" #include "icrdma_hw.h" +#include "irdma_main.h" void disable_prefetch(struct irdma_hw *hw); @@ -244,11 +245,12 @@ icrdma_init_hw(struct irdma_sc_dev *dev) } void -irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, u16 qs_handle) +irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, u8 prio, u16 qs_handle) { cc->config_ok = false; cc->traffic_class = traffic_class; cc->qs_handle = qs_handle; + cc->prio = prio; cc->lfc_set = 0; cc->pfc_set = 0; } @@ -256,16 +258,27 @@ irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, u16 qs_ static bool irdma_is_lfc_set(struct irdma_config_check *cc, struct irdma_sc_vsi *vsi) { + u32 temp; u32 lfc = 1; + u32 rx_pause_enable, tx_pause_enable; u8 fn_id = vsi->dev->hmc_fn_id; - lfc &= (rd32(vsi->dev->hw, - PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0 + 4 * fn_id) >> 8); - lfc &= (rd32(vsi->dev->hw, - PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0 + 4 * fn_id) >> 8); + if (irdma_fw_major_ver(vsi->dev) == 1) { + rx_pause_enable = PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0; + tx_pause_enable = PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0; + } else { + rx_pause_enable = CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0; + tx_pause_enable = CNV_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0; + } + +#define LFC_ENABLE BIT_ULL(8) +#define LFC_ENABLE_S 8 + temp = rd32(vsi->dev->hw, rx_pause_enable + 4 * fn_id); + lfc &= FIELD_GET(LFC_ENABLE, temp); + temp = rd32(vsi->dev->hw, tx_pause_enable + 4 * fn_id); + lfc &= FIELD_GET(LFC_ENABLE, temp); lfc &= rd32(vsi->dev->hw, PRTMAC_HSEC_CTL_RX_ENABLE_GPP_0 + 4 * vsi->dev->hmc_fn_id); - if (lfc) return true; return false; @@ -290,14 +303,21 @@ static bool irdma_is_pfc_set(struct irdma_config_check *cc, struct irdma_sc_vsi *vsi) { u32 pause; + u32 rx_pause_enable, tx_pause_enable; u8 fn_id = vsi->dev->hmc_fn_id; - pause = (rd32(vsi->dev->hw, - PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0 + 4 * fn_id) >> - cc->traffic_class) & BIT(0); - pause &= (rd32(vsi->dev->hw, - PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0 + 4 * fn_id) >> - cc->traffic_class) & BIT(0); + if (irdma_fw_major_ver(vsi->dev) == 1) { + rx_pause_enable = PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0; + tx_pause_enable = PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0; + } else { + rx_pause_enable = CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0; + tx_pause_enable = CNV_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0; + } + + pause = (rd32(vsi->dev->hw, rx_pause_enable + 4 * fn_id) >> + cc->prio) & BIT(0); + pause &= (rd32(vsi->dev->hw, tx_pause_enable + 4 * fn_id) >> + cc->prio) & BIT(0); return irdma_check_tc_has_pfc(vsi, GLDCB_TC2PFC, cc->traffic_class) && pause; @@ -314,17 +334,18 @@ irdma_is_config_ok(struct irdma_config_check *cc, struct irdma_sc_vsi *vsi) return cc->config_ok; } -#define IRDMA_RCV_WND_NO_FC 65536 -#define IRDMA_RCV_WND_FC 65536 +#define IRDMA_RCV_WND_NO_FC 0x1FFFC +#define IRDMA_RCV_WND_FC 0x3FFFC -#define IRDMA_CWND_NO_FC 0x1 -#define IRDMA_CWND_FC 0x18 +#define IRDMA_CWND_NO_FC 0x20 +#define IRDMA_CWND_FC 0x400 +#define IRDMA_CWND_DCQCN_FC 0x80000 #define IRDMA_RTOMIN_NO_FC 0x5 #define IRDMA_RTOMIN_FC 0x32 #define IRDMA_ACKCREDS_NO_FC 0x02 -#define IRDMA_ACKCREDS_FC 0x06 +#define IRDMA_ACKCREDS_FC 0x1E static void irdma_check_flow_ctrl(struct irdma_sc_vsi *vsi, u8 user_prio, u8 traffic_class) @@ -372,7 +393,7 @@ irdma_check_fc_for_qp(struct irdma_sc_vsi *vsi, struct irdma_sc_qp *sc_qp) struct irdma_config_check *cfg_chk = &vsi->cfg_check[i]; irdma_init_config_check(cfg_chk, - vsi->qos[i].traffic_class, + vsi->qos[i].traffic_class, i, vsi->qos[i].qs_handle); if (sc_qp->qs_handle == cfg_chk->qs_handle) irdma_check_flow_ctrl(vsi, i, cfg_chk->traffic_class); diff --git a/sys/dev/irdma/icrdma_hw.h b/sys/dev/irdma/icrdma_hw.h index b413b478538a..0b48c69ba4ee 100644 --- a/sys/dev/irdma/icrdma_hw.h +++ b/sys/dev/irdma/icrdma_hw.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2023 Intel Corporation + * Copyright (c) 2017 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -83,6 +83,16 @@ #define PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_2 0x001e31a8 #define PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_3 0x001e31aC +#define CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_0 0x001e2180 +#define CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_1 0x001e2184 +#define CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_2 0x001e2188 +#define CNV_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_3 0x001e218c + +#define CNV_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_0 0x001e21a0 +#define CNV_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_1 0x001e21a4 +#define CNV_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_2 0x001e21a8 +#define CVN_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_3 0x001e21ac + #define PRTMAC_HSEC_CTL_RX_ENABLE_GPP_0 0x001e34c0 #define PRTMAC_HSEC_CTL_RX_ENABLE_GPP_1 0x001e34c4 #define PRTMAC_HSEC_CTL_RX_ENABLE_GPP_2 0x001e34c8 @@ -120,8 +130,8 @@ enum icrdma_device_caps_const { ICRDMA_MAX_SGE_RD = 13, ICRDMA_MAX_STATS_COUNT = 128, - ICRDMA_MAX_IRD_SIZE = 32, - ICRDMA_MAX_ORD_SIZE = 32, + ICRDMA_MAX_IRD_SIZE = 8, + ICRDMA_MAX_ORD_SIZE = 8, ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */, ICRDMA_MAX_PUSH_PAGE_COUNT = 256, @@ -130,6 +140,7 @@ enum icrdma_device_caps_const { void icrdma_init_hw(struct irdma_sc_dev *dev); void irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, + u8 prio, u16 qs_handle); bool irdma_is_config_ok(struct irdma_config_check *cc, struct irdma_sc_vsi *vsi); void irdma_check_fc_for_tc_update(struct irdma_sc_vsi *vsi, diff --git a/sys/dev/irdma/irdma.h b/sys/dev/irdma/irdma.h index e6e493f1854a..b745626b8b60 100644 --- a/sys/dev/irdma/irdma.h +++ b/sys/dev/irdma/irdma.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2022 Intel Corporation + * Copyright (c) 2017 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -99,6 +99,7 @@ #define IRDMA_PFHMC_SDCMD_PMSDPARTSEL BIT(15) #define IRDMA_INVALID_CQ_IDX 0xffffffff +#define IRDMA_Q_INVALID_IDX 0xffff enum irdma_dyn_idx_t { IRDMA_IDX_ITR0 = 0, @@ -195,6 +196,7 @@ struct irdma_uk_attrs { u32 max_hw_wq_quanta; u32 min_hw_cq_size; u32 max_hw_cq_size; + u16 max_hw_push_len; u16 max_hw_sq_chunk; u16 min_hw_wq_size; u8 hw_rev; diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c index f3ca761b32f6..669d2cf5cb10 100644 --- a/sys/dev/irdma/irdma_cm.c +++ b/sys/dev/irdma/irdma_cm.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2025 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,18 +38,110 @@ static void irdma_cm_post_event(struct irdma_cm_event *event); static void irdma_disconnect_worker(struct work_struct *work); /** - * irdma_free_sqbuf - put back puda buffer if refcount is 0 + * irdma_cm_node_cmp_state - Compare the state of a CM node + * @cm_node: Pointer to the CM node structure + * @state: The state to compare against + * + * This function checks if the current state of the given CM node matches + * the specified state. + * + * Return: true if the states match, false otherwise. + */ +static bool +irdma_cm_node_cmp_state(struct irdma_cm_node *cm_node, + enum irdma_cm_node_state state) +{ + + return cm_node->state == state; +} + +/** + * irdma_cm_node_set_state - Set the state of a CM node + * @cm_node: Pointer to the CM node whose state is to be updated + * @state: The new state to set for the CM node + * + * This function updates the state of the specified CM node to the + * provided state and returns the previous state of the CM node. + * + * Return: The previous state of the CM node. + */ +static enum irdma_cm_node_state +irdma_cm_node_set_state(struct irdma_cm_node *cm_node, + enum irdma_cm_node_state state) +{ + enum irdma_cm_node_state old_state; + + old_state = cm_node->state; + cm_node->state = state; + return old_state; +} + +/** + * irdma_rem_ref_sqbuf - put back puda buffer if refcount is 0 + * @vsi: The VSI structure of the device + * @buf: puda buffer to free + */ +static int +irdma_rem_ref_sqbuf(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf) +{ + struct irdma_puda_rsrc *ilq = vsi->ilq; + struct irdma_cm_node *cm_node = buf->scratch; + struct irdma_cm_core *cm_core; + + if (!atomic_dec_and_test(&buf->pb_refcount)) + return 0; + + irdma_puda_ret_bufpool(ilq, buf); + + if (cm_node) { + buf->scratch = NULL; + cm_core = cm_node->cm_core; + cm_core->cm_free_ah(cm_node); + } + + return 1; +} + +/** + * irdma_cm_ilq_cmpl_handler - callback function when ILQ completes a send * @vsi: The VSI structure of the device - * @bufp: puda buffer to free + * @bufp: puda buffer structure from sent packet */ void -irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp) +irdma_cm_ilq_cmpl_handler(struct irdma_sc_vsi *vsi, void *bufp) { struct irdma_puda_buf *buf = bufp; - struct irdma_puda_rsrc *ilq = vsi->ilq; - if (atomic_dec_and_test(&buf->refcount)) - irdma_puda_ret_bufpool(ilq, buf); + irdma_rem_ref_sqbuf(vsi, buf); +} + +/** + * irdma_cm_send_buf - Sends a buffer using the PUDA ILQ + * @ilq: Pointer to the PUDA (Protocol Unit Data Agent) resource structure + * @buf: Pointer to the PUDA buffer to be sent + * + * This function is responsible for transmitting a buffer through the + * specified PUDA resource. It is typically used in the context of + * managing RDMA connections and their associated data transfers. + * + * Return: 0 on success, or a negative error code on failure. + */ +static int +irdma_cm_send_buf( + struct irdma_puda_rsrc *ilq, + struct irdma_puda_buf *buf +) +{ + int ret; + + if (!atomic_inc_not_zero(&buf->pb_refcount)) + pr_err("irdma: puda buffer refcnt increase from zero\n"); + + ret = irdma_puda_send_buf(ilq, buf); + if (ret) + irdma_rem_ref_sqbuf(ilq->vsi, buf); + + return ret; } /** @@ -255,7 +347,7 @@ irdma_timer_list_prep(struct irdma_cm_core *cm_core, HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if ((cm_node->close_entry || cm_node->send_entry) && - atomic_inc_not_zero(&cm_node->refcnt)) + irdma_add_ref_cmnode(cm_node)) list_add(&cm_node->timer_entry, timer_list); } } @@ -304,17 +396,16 @@ irdma_create_event(struct irdma_cm_node *cm_node, static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node) { - struct irdma_device *iwdev = cm_node->iwdev; struct irdma_timer_entry *send_entry; send_entry = cm_node->send_entry; + cm_node->send_entry = NULL; if (!send_entry) return; - cm_node->send_entry = NULL; - irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf); + irdma_rem_ref_sqbuf(&cm_node->iwdev->vsi, send_entry->sqbuf); kfree(send_entry); - atomic_dec(&cm_node->refcnt); + irdma_rem_ref_cmnode(cm_node); } /** @@ -367,6 +458,7 @@ irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node, } sqbuf->ah_id = cm_node->ah->ah_info.ah_idx; + sqbuf->ah = cm_node->ah; buf = sqbuf->mem.va; if (options) opts_len = (u32)options->size; @@ -433,7 +525,7 @@ irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node, if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); - atomic_set(&sqbuf->refcount, 1); + atomic_set(&sqbuf->pb_refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); @@ -620,7 +712,7 @@ irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node, if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); - atomic_set(&sqbuf->refcount, 1); + atomic_set(&sqbuf->pb_refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); @@ -667,11 +759,12 @@ irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset) if (reset) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p state=%d\n", cm_node, cm_node->state); - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); } - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); } @@ -685,13 +778,13 @@ irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset) { irdma_cleanup_retrans_entry(cm_node); cm_node->cm_core->stats_passive_errs++; - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p state=%d\n", cm_node, cm_node->state); if (reset) irdma_send_reset(cm_node); else - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } /** @@ -717,7 +810,7 @@ irdma_event_connect_error(struct irdma_cm_event *event) cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cmnode(event->cm_node); } /** @@ -989,7 +1082,7 @@ irdma_send_mpa_reject(struct irdma_cm_node *cm_node, if (!sqbuf) return -ENOMEM; - cm_node->state = IRDMA_CM_STATE_FIN_WAIT1; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_FIN_WAIT1); return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); @@ -1028,7 +1121,7 @@ irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node, goto negotiate_done; } - if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { + if (!irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_MPAREQ_SENT)) { /* responder */ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ)) cm_node->ird_size = 1; @@ -1108,7 +1201,7 @@ irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type, } cm_node->mpa_frame_rev = mpa_frame->rev; - if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { + if (!irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_MPAREQ_SENT)) { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, @@ -1170,7 +1263,7 @@ irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type, * @close_when_complete: is cm_node to be removed * * note - cm_node needs to be protected before calling this. Encase in: - * irdma_rem_ref_cm_node(cm_core, cm_node); + * irdma_rem_ref_cmnode(cm_core, cm_node); * irdma_schedule_cm_timer(...) * atomic_inc(&cm_node->refcnt); */ @@ -1189,7 +1282,7 @@ irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { if (type != IRDMA_TIMER_TYPE_CLOSE) - irdma_free_sqbuf(vsi, sqbuf); + irdma_rem_ref_sqbuf(vsi, sqbuf); return -ENOMEM; } @@ -1204,6 +1297,7 @@ irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, if (type == IRDMA_TIMER_TYPE_CLOSE) { new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { + irdma_rem_ref_sqbuf(vsi, sqbuf); kfree(new_send); irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "already close entry\n"); @@ -1213,17 +1307,29 @@ irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, cm_node->close_entry = new_send; } else { /* type == IRDMA_TIMER_TYPE_SEND */ spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + if (cm_node->send_entry) { + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); + irdma_rem_ref_sqbuf(vsi, sqbuf); + kfree(new_send); + + return -EINVAL; + } cm_node->send_entry = new_send; - atomic_inc(&cm_node->refcnt); + irdma_add_ref_cmnode(cm_node); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT; - atomic_inc(&sqbuf->refcount); - irdma_puda_send_buf(vsi->ilq, sqbuf); + if (sqbuf->ah) + atomic_inc(&sqbuf->ah->ah_info.ah_refcnt); + + if (irdma_cm_send_buf(vsi->ilq, new_send->sqbuf)) + cm_core->cm_free_ah(cm_node); + if (!send_retrans) { irdma_cleanup_retrans_entry(cm_node); if (close_when_complete) - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); return 0; } } @@ -1247,21 +1353,22 @@ irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, static void irdma_retrans_expired(struct irdma_cm_node *cm_node) { - enum irdma_cm_node_state state = cm_node->state; + enum irdma_cm_node_state state; - cm_node->state = IRDMA_CM_STATE_CLOSED; + state = irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); switch (state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_CLOSING: - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: irdma_send_reset(cm_node); break; default: - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); break; } @@ -1297,7 +1404,7 @@ irdma_handle_close_entry(struct irdma_cm_node *cm_node, } } else if (rem_node) { /* TIME_WAIT state */ - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } kfree(close_entry); @@ -1352,7 +1459,7 @@ irdma_cm_timer_tick(struct timer_list *t) if (!send_entry) goto done; if (time_after(send_entry->timetosend, jiffies)) { - if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { + if (!irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) { if (nexttimeout > send_entry->timetosend || !settimer) { nexttimeout = send_entry->timetosend; @@ -1364,8 +1471,8 @@ irdma_cm_timer_tick(struct timer_list *t) goto done; } - if (cm_node->state == IRDMA_CM_STATE_OFFLOADED || - cm_node->state == IRDMA_CM_STATE_CLOSED) { + if (irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED) || + irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_CLOSED)) { irdma_free_retrans_entry(cm_node); goto done; } @@ -1376,7 +1483,7 @@ irdma_cm_timer_tick(struct timer_list *t) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); irdma_retrans_expired(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); goto done; } @@ -1384,18 +1491,20 @@ irdma_cm_timer_tick(struct timer_list *t) vsi = &cm_node->iwdev->vsi; if (!cm_node->ack_rcvd) { - atomic_inc(&send_entry->sqbuf->refcount); - irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf); + if (send_entry->sqbuf->ah) + atomic_inc(&send_entry->sqbuf->ah->ah_info.ah_refcnt); + if (irdma_cm_send_buf(vsi->ilq, send_entry->sqbuf)) + cm_core->cm_free_ah(cm_node); + cm_node->cm_core->stats_pkt_retrans++; } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; - timetosend = (IRDMA_RETRY_TIMEOUT << - (IRDMA_DEFAULT_RETRANS - - send_entry->retranscount)); - + timetosend = IRDMA_RETRY_TIMEOUT << + min(IRDMA_DEFAULT_RETRANS - + send_entry->retranscount, (u32)4); send_entry->timetosend = jiffies + min(timetosend, IRDMA_MAX_TIMEOUT); if (nexttimeout > send_entry->timetosend || !settimer) { @@ -1408,11 +1517,11 @@ irdma_cm_timer_tick(struct timer_list *t) close_when_complete = send_entry->close_when_complete; irdma_free_retrans_entry(cm_node); if (close_when_complete) - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } done: spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } if (settimer) { @@ -1489,8 +1598,15 @@ irdma_send_ack(struct irdma_cm_node *cm_node) sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK); - if (sqbuf) - irdma_puda_send_buf(vsi->ilq, sqbuf); + if (sqbuf) { + if (sqbuf->ah) + atomic_inc(&sqbuf->ah->ah_info.ah_refcnt); + + if (irdma_cm_send_buf(vsi->ilq, sqbuf)) + cm_node->cm_core->cm_free_ah(cm_node); + + irdma_rem_ref_sqbuf(vsi, sqbuf); + } } /** @@ -1665,7 +1781,6 @@ u16 irdma_get_vlan_ipv4(struct iw_cm_id *cm_id, u32 *addr) { u16 vlan_id = 0xFFFF; - #ifdef INET if_t netdev; struct vnet *vnet = &init_net; @@ -1831,7 +1946,7 @@ irdma_reset_list_prep(struct irdma_cm_core *cm_core, HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if (cm_node->listener == listener && !cm_node->accelerated && - atomic_inc_not_zero(&cm_node->refcnt)) + irdma_add_ref_cmnode(cm_node)) list_add(&cm_node->reset_entry, reset_list); } } @@ -1869,21 +1984,20 @@ irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core, cm_node = container_of(list_pos, struct irdma_cm_node, reset_entry); if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) { - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); continue; } irdma_cleanup_retrans_entry(cm_node); err = irdma_send_reset(cm_node); if (err) { - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "send reset failed\n"); } else { - old_state = cm_node->state; - cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED; + old_state = irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_LISTENER_DESTROYED); if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD) - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } } @@ -1969,7 +2083,7 @@ irdma_find_node(struct irdma_cm_core *cm_core, cm_node->loc_port == loc_port && cm_node->rem_port == rem_port && !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) && !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) { - if (!atomic_inc_not_zero(&cm_node->refcnt)) + if (!irdma_add_ref_cmnode(cm_node)) goto exit; rcu_read_unlock(); return cm_node; @@ -2078,10 +2192,24 @@ irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait) &cm_node->ah)) return -ENOMEM; + atomic_set(&cm_node->ah->ah_info.ah_refcnt, 1); + return 0; } /** + * irdma_cm_free_ah_worker - async free a cm address handle + * @work: pointer to ah structure + */ +static void +irdma_cm_free_ah_worker(struct work_struct *work) +{ + struct irdma_sc_ah *ah = container_of(work, struct irdma_sc_ah, ah_free_work); + + irdma_puda_free_ah(ah->dev, ah); +} + +/** * irdma_cm_free_ah - free a cm address handle * @cm_node: The connection manager node to create AH for */ @@ -2090,8 +2218,14 @@ irdma_cm_free_ah(struct irdma_cm_node *cm_node) { struct irdma_device *iwdev = cm_node->iwdev; - irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah); - cm_node->ah = NULL; + if (cm_node->ah) { + if (!atomic_dec_and_test(&cm_node->ah->ah_info.ah_refcnt)) + return; + + INIT_WORK(&cm_node->ah->ah_free_work, irdma_cm_free_ah_worker); + queue_work(iwdev->cleanup_wq, &cm_node->ah->ah_free_work); + cm_node->ah = NULL; + } } /** @@ -2109,11 +2243,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct irdma_cm_node *cm_node; int arpindex; if_t netdev = iwdev->netdev; + int ret; /* create an hte and cm_node for this instance */ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); if (!cm_node) - return NULL; + return ERR_PTR(-ENOMEM); /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; @@ -2170,8 +2305,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, kc_set_loc_seq_num_mss(cm_node); arpindex = irdma_resolve_neigh_lpb_chk(iwdev, cm_node, cm_info); - if (arpindex < 0) + if (arpindex < 0) { + ret = -EINVAL; goto err; + } ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); irdma_add_hte_node(cm_core, cm_node); @@ -2181,7 +2318,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, err: kfree(cm_node); - return NULL; + return ERR_PTR(ret); } static void @@ -2197,6 +2334,9 @@ irdma_destroy_connection(struct irdma_cm_node *cm_node) "node destroyed before established\n"); atomic_dec(&cm_node->listener->pend_accepts_cnt); } + + if (cm_node->send_entry) + irdma_cleanup_retrans_entry(cm_node); if (cm_node->close_entry) irdma_handle_close_entry(cm_node, 0); if (cm_node->listener) { @@ -2237,11 +2377,28 @@ irdma_destroy_connection(struct irdma_cm_node *cm_node) } /** - * irdma_rem_ref_cm_node - destroy an instance of a cm node + * irdma_add_ref_cmnode - add reference to an instance of a cm node + * @cm_node: connection's node + */ +bool +irdma_add_ref_cmnode(struct irdma_cm_node *cm_node) +{ + if (atomic_inc_not_zero(&cm_node->refcnt)) + return true; + + /* + * Trying to add refcount to a cmnode being destroyed. + */ + + return false; +} + +/** + * irdma_rem_ref_cmnode - destroy an instance of a cm node * @cm_node: connection's node */ void -irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) +irdma_rem_ref_cmnode(struct irdma_cm_node *cm_node) { struct irdma_cm_core *cm_core = cm_node->cm_core; unsigned long flags; @@ -2280,21 +2437,23 @@ irdma_handle_fin_pkt(struct irdma_cm_node *cm_node) case IRDMA_CM_STATE_MPAREJ_RCVD: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_LAST_ACK; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_LAST_ACK); irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_MPAREQ_SENT: irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); + break; case IRDMA_CM_STATE_FIN_WAIT1: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSING; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSING); irdma_send_ack(cm_node); /* * Wait for ACK as this is simultaneous close. After we receive ACK, do not send anything. Just rm the @@ -2304,7 +2463,7 @@ irdma_handle_fin_pkt(struct irdma_cm_node *cm_node) case IRDMA_CM_STATE_FIN_WAIT2: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_TIME_WAIT; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_TIME_WAIT); irdma_send_ack(cm_node); irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE, 1, 0); @@ -2312,8 +2471,8 @@ irdma_handle_fin_pkt(struct irdma_cm_node *cm_node) case IRDMA_CM_STATE_TIME_WAIT: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; - irdma_rem_ref_cm_node(cm_node); + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: default: @@ -2347,7 +2506,7 @@ irdma_handle_rst_pkt(struct irdma_cm_node *cm_node, /* Drop down to MPA_V1 */ cm_node->mpa_frame_rev = IETF_MPA_V1; /* send a syn and goto syn sent state */ - cm_node->state = IRDMA_CM_STATE_SYN_SENT; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_SYN_SENT); if (irdma_send_syn(cm_node, 0)) irdma_active_open_err(cm_node, false); break; @@ -2374,8 +2533,8 @@ irdma_handle_rst_pkt(struct irdma_cm_node *cm_node, case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_TIME_WAIT: - cm_node->state = IRDMA_CM_STATE_CLOSED; - irdma_rem_ref_cm_node(cm_node); + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); + irdma_rem_ref_cmnode(cm_node); break; default: break; @@ -2400,7 +2559,7 @@ irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node, err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize); if (err) { - if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT) + if (irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_MPAREQ_SENT)) irdma_active_open_err(cm_node, true); else irdma_passive_open_err(cm_node, true); @@ -2412,7 +2571,7 @@ irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node, if (res_type == IRDMA_MPA_REQUEST_REJECT) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "state for reject\n"); - cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_MPAREQ_RCVD); type = IRDMA_CM_EVENT_MPA_REQ; irdma_send_ack(cm_node); /* ACK received MPA request */ atomic_set(&cm_node->passive_state, @@ -2422,10 +2581,10 @@ irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node, irdma_cleanup_retrans_entry(cm_node); if (res_type == IRDMA_MPA_REQUEST_REJECT) { type = IRDMA_CM_EVENT_MPA_REJECT; - cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_MPAREJ_RCVD); } else { type = IRDMA_CM_EVENT_CONNECTED; - cm_node->state = IRDMA_CM_STATE_OFFLOADED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_OFFLOADED); } irdma_send_ack(cm_node); break; @@ -2542,12 +2701,13 @@ irdma_handle_syn_pkt(struct irdma_cm_node *cm_node, cm_node->accept_pend = 1; atomic_inc(&cm_node->listener->pend_accepts_cnt); - cm_node->state = IRDMA_CM_STATE_SYN_RCVD; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_SYN_RCVD); break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_ESTABLISHED: @@ -2605,7 +2765,7 @@ irdma_handle_synack_pkt(struct irdma_cm_node *cm_node, cm_node); break; } - cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_MPAREQ_SENT); break; case IRDMA_CM_STATE_MPAREQ_RCVD: irdma_passive_open_err(cm_node, true); @@ -2613,14 +2773,15 @@ irdma_handle_synack_pkt(struct irdma_cm_node *cm_node, case IRDMA_CM_STATE_LISTENING: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_FIN_WAIT1: @@ -2663,7 +2824,7 @@ irdma_handle_ack_pkt(struct irdma_cm_node *cm_node, if (ret) return ret; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); - cm_node->state = IRDMA_CM_STATE_ESTABLISHED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_ESTABLISHED); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; irdma_handle_rcv_mpa(cm_node, rbuf); @@ -2688,23 +2849,24 @@ irdma_handle_ack_pkt(struct irdma_cm_node *cm_node, break; case IRDMA_CM_STATE_LISTENING: irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); - atomic_inc(&cm_node->refcnt); - irdma_send_reset(cm_node); + irdma_add_ref_cmnode(cm_node); + if (irdma_send_reset(cm_node)) + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_CLOSING: irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_CLOSED; - irdma_rem_ref_cm_node(cm_node); + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: irdma_cleanup_retrans_entry(cm_node); - cm_node->state = IRDMA_CM_STATE_FIN_WAIT2; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_FIN_WAIT2); break; case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_FIN_WAIT2: @@ -2851,8 +3013,8 @@ irdma_create_cm_node(struct irdma_cm_core *cm_core, /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); - if (!cm_node) - return -ENOMEM; + if (IS_ERR(cm_node)) + return PTR_ERR(cm_node); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; @@ -2889,13 +3051,13 @@ irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata, passive_state = atomic_add_return(1, &cm_node->passive_state); if (passive_state == IRDMA_SEND_RESET_EVENT) { - cm_node->state = IRDMA_CM_STATE_CLOSED; - irdma_rem_ref_cm_node(cm_node); + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); + irdma_rem_ref_cmnode(cm_node); return 0; } - if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) { - irdma_rem_ref_cm_node(cm_node); + if (irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_LISTENER_DESTROYED)) { + irdma_rem_ref_cmnode(cm_node); return 0; } @@ -2903,7 +3065,7 @@ irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata, if (!ret) return 0; - cm_node->state = IRDMA_CM_STATE_CLOSED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_CLOSED); if (irdma_send_reset(cm_node)) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "send reset failed\n"); @@ -2930,7 +3092,7 @@ irdma_cm_close(struct irdma_cm_node *cm_node) irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSE_WAIT: - cm_node->state = IRDMA_CM_STATE_LAST_ACK; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_LAST_ACK); irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: @@ -2948,13 +3110,13 @@ irdma_cm_close(struct irdma_cm_node *cm_node) case IRDMA_CM_STATE_INITED: case IRDMA_CM_STATE_CLOSED: case IRDMA_CM_STATE_LISTENER_DESTROYED: - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: if (cm_node->send_entry) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "CM send_entry in OFFLOADED state\n"); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); break; } @@ -3052,28 +3214,29 @@ irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); - if (!cm_node) { + if (IS_ERR(cm_node)) { irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "allocate node failed\n"); + "allocate node failed ret=%ld\n", + PTR_ERR(cm_node)); atomic_dec(&listener->refcnt); return; } if (!(tcp_get_flags(tcph) & (TH_RST | TH_FIN))) { - cm_node->state = IRDMA_CM_STATE_LISTENING; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_LISTENING); } else { - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); return; } - atomic_inc(&cm_node->refcnt); - } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) { - irdma_rem_ref_cm_node(cm_node); + irdma_add_ref_cmnode(cm_node); + } else if (irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) { + irdma_rem_ref_cmnode(cm_node); return; } irdma_process_pkt(cm_node, rbuf); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } static int @@ -3248,7 +3411,7 @@ irdma_cm_init_tsa_conn(struct irdma_qp *iwqp, cm_node->lsmm_size; } - cm_node->state = IRDMA_CM_STATE_OFFLOADED; + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_OFFLOADED); iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED; iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx; @@ -3310,6 +3473,68 @@ irdma_qp_disconnect(struct irdma_qp *iwqp) irdma_cm_close(iwqp->cm_node); } +static void +dump_qp_ae_info(struct irdma_qp *iwqp) +{ + struct irdma_device *iwdev = iwqp->iwdev; + struct irdma_ae_info *ae_info = &iwdev->ae_info; + u16 ae = iwqp->last_aeq; + + if (!ae) + return; + + /* + * When there is a hard link disconnect reduce prints to avoid slowing down qp cleanup. + */ + if (ae == IRDMA_AE_LLP_TOO_MANY_RETRIES) { + unsigned long flags; + u32 retry_cnt; + + spin_lock_irqsave(&ae_info->info_lock, flags); + ae_info->retry_cnt++; + if (time_after(ae_info->retry_delay, jiffies)) { + spin_unlock_irqrestore(&ae_info->info_lock, flags); + return; + } + + retry_cnt = ae_info->retry_cnt; + ae_info->retry_cnt = 0; + ae_info->retry_delay = jiffies + + msecs_to_jiffies(IRDMA_RETRY_PRINT_MS); + spin_unlock_irqrestore(&ae_info->info_lock, flags); + + irdma_dev_err(&iwdev->ibdev, + "qp async event qp_id = %d, ae = 0x%x (%s), qp_cnt = %d\n", + iwqp->sc_qp.qp_uk.qp_id, ae, irdma_get_ae_desc(ae), + retry_cnt); + + return; + } + switch (ae) { + case IRDMA_AE_BAD_CLOSE: + case IRDMA_AE_LLP_CLOSE_COMPLETE: + case IRDMA_AE_LLP_CONNECTION_RESET: + case IRDMA_AE_LLP_FIN_RECEIVED: + case IRDMA_AE_LLP_SYN_RECEIVED: + case IRDMA_AE_LLP_TERMINATE_RECEIVED: + case IRDMA_AE_LLP_DOUBT_REACHABILITY: + case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: + case IRDMA_AE_RESET_SENT: + case IRDMA_AE_TERMINATE_SENT: + case IRDMA_AE_RESET_NOT_SENT: + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, + "qp async avent qp_id = %d, ae = 0x%x (%s), src = %d, ae_cnt = %d\n", + iwqp->sc_qp.qp_uk.qp_id, ae, irdma_get_ae_desc(ae), + iwqp->ae_src, atomic_read(&ae_info->ae_cnt)); + break; + default: + irdma_dev_err(&iwdev->ibdev, + "qp async event qp_id = %d, ae = 0x%x (%s), src = %d, ae_cnt = %d\n", + iwqp->sc_qp.qp_uk.qp_id, ae, irdma_get_ae_desc(ae), + iwqp->ae_src, atomic_read(&ae_info->ae_cnt)); + } +} + /** * irdma_cm_disconn_true - called by worker thread to disconnect qp * @iwqp: associate qp for the connection @@ -3331,11 +3556,15 @@ irdma_cm_disconn_true(struct irdma_qp *iwqp) int err; iwdev = iwqp->iwdev; + + dump_qp_ae_info(iwqp); spin_lock_irqsave(&iwqp->lock, flags); + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { struct ib_qp_attr attr; - if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) { + if (atomic_read(&iwqp->flush_issued) || + iwqp->sc_qp.qp_uk.destroy_pending) { spin_unlock_irqrestore(&iwqp->lock, flags); return; } @@ -3358,10 +3587,8 @@ irdma_cm_disconn_true(struct irdma_qp *iwqp) issue_close = 1; iwqp->cm_id = NULL; irdma_terminate_del_timer(qp); - if (!iwqp->flush_issued) { - iwqp->flush_issued = 1; + if (!atomic_read(&iwqp->flush_issued)) issue_flush = 1; - } } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) || ((original_ibqp_state == IB_QPS_RTS) && (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) { @@ -3378,10 +3605,8 @@ irdma_cm_disconn_true(struct irdma_qp *iwqp) issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; - if (!iwqp->flush_issued) { - iwqp->flush_issued = 1; + if (!atomic_read(&iwqp->flush_issued)) issue_flush = 1; - } } spin_unlock_irqrestore(&iwqp->lock, flags); @@ -3401,7 +3626,7 @@ irdma_cm_disconn_true(struct irdma_qp *iwqp) spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); return; } - atomic_inc(&iwqp->cm_node->refcnt); + irdma_add_ref_cmnode(iwqp->cm_node); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); @@ -3424,7 +3649,7 @@ irdma_cm_disconn_true(struct irdma_qp *iwqp) cm_id); irdma_qp_disconnect(iwqp); } - irdma_rem_ref_cm_node(iwqp->cm_node); + irdma_rem_ref_cmnode(iwqp->cm_node); } /** @@ -3544,7 +3769,7 @@ irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwpd = iwqp->iwpd; tagged_offset = (uintptr_t)iwqp->ietf_mem.va; ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len, - IB_ACCESS_LOCAL_WRITE, &tagged_offset); + IB_ACCESS_LOCAL_WRITE, &tagged_offset, false); if (IS_ERR(ibmr)) { ret = -ENOMEM; goto error; @@ -3611,7 +3836,7 @@ irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return 0; error: irdma_free_lsmm_rsrc(iwqp); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); return ret; } @@ -3761,8 +3986,8 @@ irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) irdma_qp_add_ref(&iwqp->ibqp); cm_id->add_ref(cm_id); - if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { - cm_node->state = IRDMA_CM_STATE_SYN_SENT; + if (!irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) { + irdma_cm_node_set_state(cm_node, IRDMA_CM_STATE_SYN_SENT); ret = irdma_send_syn(cm_node, 0); if (ret) goto err; @@ -3784,7 +4009,7 @@ err: irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%x:%x:%x:%x", IRDMA_PRINT_IP6(cm_info.rem_addr)); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); iwdev->cm_core.stats_connect_errs++; return ret; @@ -3955,7 +4180,7 @@ irdma_iw_teardown_list_prep(struct irdma_cm_core *cm_core, if ((disconnect_all || (nfo->vlan_id == cm_node->vlan_id && !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) && - atomic_inc_not_zero(&cm_node->refcnt)) + irdma_add_ref_cmnode(cm_node)) list_add(&cm_node->teardown_entry, teardown_list); } } @@ -4089,7 +4314,7 @@ error: cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, status); - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cmnode(event->cm_node); } /** @@ -4144,20 +4369,20 @@ irdma_cm_event_handler(struct work_struct *work) break; case IRDMA_CM_EVENT_CONNECTED: if (!event->cm_node->cm_id || - event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) + !irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: if (!event->cm_node->cm_id || - cm_node->state == IRDMA_CM_STATE_OFFLOADED) + irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: if (!event->cm_node->cm_id || - event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) + irdma_cm_node_cmp_state(cm_node, IRDMA_CM_STATE_OFFLOADED)) break; irdma_event_connect_error(event); break; @@ -4167,7 +4392,7 @@ irdma_cm_event_handler(struct work_struct *work) break; } - irdma_rem_ref_cm_node(event->cm_node); + irdma_rem_ref_cmnode(cm_node); kfree(event); } @@ -4178,7 +4403,7 @@ irdma_cm_event_handler(struct work_struct *work) static void irdma_cm_post_event(struct irdma_cm_event *event) { - atomic_inc(&event->cm_node->refcnt); + irdma_add_ref_cmnode(event->cm_node); INIT_WORK(&event->event_work, irdma_cm_event_handler); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); } @@ -4219,7 +4444,7 @@ irdma_cm_teardown_connections(struct irdma_device *iwdev, irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (iwdev->rf->reset) irdma_cm_disconn(cm_node->iwqp); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } if (!rdma_protocol_roce(&iwdev->ibdev, 1)) diff --git a/sys/dev/irdma/irdma_cm.h b/sys/dev/irdma/irdma_cm.h index 36cebdb5bf19..26fbee4499c1 100644 --- a/sys/dev/irdma/irdma_cm.h +++ b/sys/dev/irdma/irdma_cm.h @@ -443,6 +443,7 @@ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port); void irdma_send_ack(struct irdma_cm_node *cm_node); void irdma_lpb_nop(struct irdma_sc_qp *qp); -void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node); +bool irdma_add_ref_cmnode(struct irdma_cm_node *cm_node); +void irdma_rem_ref_cmnode(struct irdma_cm_node *cm_node); void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node); #endif /* IRDMA_CM_H */ diff --git a/sys/dev/irdma/irdma_ctrl.c b/sys/dev/irdma/irdma_ctrl.c index 79ed14a60670..c3bddab7f477 100644 --- a/sys/dev/irdma/irdma_ctrl.c +++ b/sys/dev/irdma/irdma_ctrl.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -81,6 +81,24 @@ irdma_get_qp_from_list(struct list_head *head, } /** + * irdma_get_qp_qs - return qs_handle for the qp + * @qp: qp for qset + * + * Returns the queue set that should be used for a given qp. The qos + * mutex should be acquired before calling. + */ +static u16 irdma_get_qp_qs(struct irdma_sc_qp *qp){ + + struct irdma_sc_vsi *vsi = qp->vsi; + u16 qs_handle; + + qs_handle = + vsi->qos[qp->user_pri].qs_handle; + + return qs_handle; +} + +/** * irdma_sc_suspend_resume_qps - suspend/resume all qp's on VSI * @vsi: the VSI struct pointer * @op: Set to IRDMA_OP_RESUME or IRDMA_OP_SUSPEND @@ -96,18 +114,28 @@ irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op) qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); while (qp) { if (op == IRDMA_OP_RESUME) { + if (!qp->suspended) { + qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, + qp); + continue; + } if (!qp->dev->ws_add(vsi, i)) { - qp->qs_handle = - vsi->qos[qp->user_pri].qs_handle; - irdma_cqp_qp_suspend_resume(qp, op); + qp->qs_handle = irdma_get_qp_qs(qp); + if (!irdma_cqp_qp_suspend_resume(qp, op)) + qp->suspended = false; } else { - irdma_cqp_qp_suspend_resume(qp, op); + if (!irdma_cqp_qp_suspend_resume(qp, op)) + qp->suspended = false; irdma_modify_qp_to_err(qp); } } else if (op == IRDMA_OP_SUSPEND) { /* issue cqp suspend command */ - if (!irdma_cqp_qp_suspend_resume(qp, op)) + if ((qp->qp_state == IRDMA_QP_STATE_RTS || + qp->qp_state == IRDMA_QP_STATE_RTR) && + !irdma_cqp_qp_suspend_resume(qp, op)) { atomic_inc(&vsi->qp_suspend_reqs); + qp->suspended = true; + } } qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); } @@ -135,7 +163,7 @@ irdma_set_qos_info(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2p) vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) irdma_init_config_check(&vsi->cfg_check[i], - l2p->up2tc[i], + l2p->up2tc[i], i, l2p->qs_handle_list[i]); vsi->qos[i].traffic_class = l2p->up2tc[i]; vsi->qos[i].rel_bw = @@ -197,15 +225,16 @@ irdma_qp_add_qos(struct irdma_sc_qp *qp) { struct irdma_sc_vsi *vsi = qp->vsi; - irdma_debug(qp->dev, IRDMA_DEBUG_DCB, - "DCB: Add qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n", - qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, - qp->on_qoslist); mutex_lock(&vsi->qos[qp->user_pri].qos_mutex); if (!qp->on_qoslist) { list_add(&qp->list, &vsi->qos[qp->user_pri].qplist); qp->on_qoslist = true; - qp->qs_handle = vsi->qos[qp->user_pri].qs_handle; + qp->qs_handle = irdma_get_qp_qs(qp); + irdma_debug(qp->dev, IRDMA_DEBUG_DCB, + "DCB: Add qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n", + qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, + qp->on_qoslist); + } mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex); } @@ -362,6 +391,7 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, u64 qw1 = 0; u64 qw2 = 0; u64 temp; + u16 qs_handle; struct irdma_sc_vsi *vsi = info->vsi; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -383,8 +413,10 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->dest_ip[2]) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[3])); } - qw2 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QS_HANDLE, - vsi->qos[info->user_pri].qs_handle); + + qs_handle = vsi->qos[info->user_pri].qs_handle; + + qw2 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QS_HANDLE, qs_handle); if (info->vlan_valid) qw2 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANID, info->vlan_id); set_64bit_val(wqe, IRDMA_BYTE_16, qw2); @@ -493,7 +525,6 @@ irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) qp->rcv_tph_en = info->rcv_tph_en; qp->xmit_tph_en = info->xmit_tph_en; qp->qp_uk.first_sq_wq = info->qp_uk_init_info.first_sq_wq; - qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; return 0; } @@ -1167,9 +1198,9 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) | - FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | - FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + /* for FNIC, a PF can send this WQE for a VF */ + hdr |= FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); @@ -1246,7 +1277,8 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_index)); - set_64bit_val(wqe, IRDMA_BYTE_40, info->hmc_fcn_index); + hdr = info->hmc_fcn_index; + set_64bit_val(wqe, IRDMA_BYTE_40, hdr); addr_type = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ? 1 : 0; hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_REG_MR) | @@ -1257,7 +1289,6 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, remote_access) | FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | - FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1299,6 +1330,7 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) | + FIELD_PREP(IRDMA_CQPSQ_STAG_SKIPFLUSH, info->skip_flush_markers) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1424,7 +1456,7 @@ irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "FAST_REG WQE", wqe, - IRDMA_QP_WQE_MIN_SIZE); + quanta * IRDMA_QP_WQE_MIN_SIZE); if (sq_info.push_wqe) irdma_qp_push_wqe(&qp->qp_uk, wqe, quanta, wqe_idx, post_sq); else if (post_sq) @@ -1970,7 +2002,7 @@ irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } - if (vsi->register_qset) { + if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) { vsi->dev->ws_add = irdma_ws_add; vsi->dev->ws_remove = irdma_ws_remove; vsi->dev->ws_reset = irdma_ws_reset; @@ -1982,23 +2014,6 @@ irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, } /** - * irdma_get_stats_idx - Return stats index - * @vsi: pointer to the vsi - */ -static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi){ - struct irdma_stats_inst_info stats_info = {0}; - struct irdma_sc_dev *dev = vsi->dev; - - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { - if (!irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_ALLOCATE, - &stats_info)) - return stats_info.stats_idx; - } - - return IRDMA_INVALID_STATS_IDX; -} - -/** * irdma_vsi_stats_init - Initialize the vsi statistics * @vsi: pointer to the vsi structure * @info: The info structure used for initialization @@ -2030,16 +2045,6 @@ irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, /* when stat allocation is not required default to fcn_id. */ vsi->stats_idx = info->fcn_id; - if (info->alloc_stats_inst) { - u16 stats_idx = irdma_get_stats_idx(vsi); - - if (stats_idx != IRDMA_INVALID_STATS_IDX) { - vsi->stats_inst_alloc = true; - vsi->stats_idx = stats_idx; - vsi->pestat->gather_info.use_stats_inst = true; - vsi->pestat->gather_info.stats_inst_index = stats_idx; - } - } return 0; } @@ -2051,16 +2056,6 @@ irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi) { - struct irdma_stats_inst_info stats_info = {0}; - struct irdma_sc_dev *dev = vsi->dev; - - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { - if (vsi->stats_inst_alloc) { - stats_info.stats_idx = vsi->stats_idx; - irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_FREE, - &stats_info); - } - } if (!vsi->pestat) return; @@ -2136,45 +2131,6 @@ irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, } /** - * irdma_sc_manage_stats_inst - allocate or free stats instance - * @cqp: struct for cqp hw - * @info: stats info structure - * @alloc: alloc vs. delete flag - * @scratch: u64 saved to be used during cqp completion - */ -static int -irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, - struct irdma_stats_inst_info *info, - bool alloc, u64 scratch) -{ - __le64 *wqe; - u64 temp; - - wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); - if (!wqe) - return -ENOSPC; - - set_64bit_val(wqe, IRDMA_BYTE_40, - FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id)); - temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) | - FIELD_PREP(IRDMA_CQPSQ_STATS_ALLOC_INST, alloc) | - FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX, - info->use_hmc_fcn_index) | - FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX, info->stats_idx) | - FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_MANAGE_STATS); - - irdma_wmb(); /* make sure WQE is written before valid bit is set */ - - set_64bit_val(wqe, IRDMA_BYTE_24, temp); - - irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_STATS WQE", wqe, - IRDMA_CQP_WQE_SIZE * 8); - - irdma_sc_cqp_post_sq(cqp); - return 0; -} - -/** * irdma_sc_set_up_map - set the up map table * @cqp: struct for cqp hw * @info: User priority map info @@ -2940,7 +2896,7 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 * buf, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_152, info, IRDMA_HMC_IW_MD); - if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) { + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) { irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_160, info, IRDMA_HMC_IW_OOISC); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_168, info, @@ -2950,7 +2906,7 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 * buf, /* searching for the last object in HMC to find the size of the HMC area. */ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) { - if (info[i].base > max_base) { + if (info[i].base > max_base && info[i].cnt) { max_base = info[i].base; last_hmc_obj = i; } @@ -3079,7 +3035,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 * buf, irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD); - if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) { + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) { irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC); get_64bit_val(buf, IRDMA_BYTE_168, &temp); @@ -3173,8 +3129,8 @@ irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, { u8 hw_sq_size; - if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || - info->sq_size < IRDMA_CQP_SW_SQSIZE_4 || + if (info->sq_size > IRDMA_CQP_SW_SQSIZE_MAX || + info->sq_size < IRDMA_CQP_SW_SQSIZE_MIN || ((info->sq_size & (info->sq_size - 1)))) return -EINVAL; @@ -3202,6 +3158,7 @@ irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, cqp->protocol_used = info->protocol_used; irdma_memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params)); cqp->en_rem_endpoint_trk = info->en_rem_endpoint_trk; + cqp->timer_slots = info->timer_slots; info->dev->cqp = cqp; IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size); @@ -3262,6 +3219,8 @@ irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) | FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile); + if (hw_rev == IRDMA_GEN_2) + temp |= FIELD_PREP(IRDMA_CQPHC_TMR_SLOT, cqp->timer_slots); if (hw_rev >= IRDMA_GEN_2) temp |= FIELD_PREP(IRDMA_CQPHC_EN_REM_ENDPOINT_TRK, cqp->en_rem_endpoint_trk); @@ -3836,10 +3795,9 @@ irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) /** * irdma_sc_cceq_create - create cceq * @ceq: ceq sc structure - * @scratch: u64 saved to be used during cqp completion */ int -irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) +irdma_sc_cceq_create(struct irdma_sc_ceq *ceq) { int ret_code; struct irdma_sc_dev *dev = ceq->dev; @@ -3850,7 +3808,7 @@ irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) if (ret_code) return ret_code; } - ret_code = irdma_sc_ceq_create(ceq, scratch, true); + ret_code = irdma_sc_ceq_create(ceq, 0, true); if (!ret_code) return irdma_sc_cceq_create_done(ceq); @@ -4081,7 +4039,9 @@ irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, bool post_sq) u64 hdr; dev = aeq->dev; - writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); + + if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) + writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -4180,6 +4140,7 @@ irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LLP_DOUBT_REACHABILITY: case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_RESET_SENT: case IRDMA_AE_TERMINATE_SENT: case IRDMA_AE_RESET_NOT_SENT: @@ -4388,8 +4349,10 @@ irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) u64 hdr; int ret_code = 0; u32 tail, val, error; + struct irdma_sc_dev *dev; cqp = ccq->dev->cqp; + dev = ccq->dev; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; @@ -4418,10 +4381,11 @@ irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) if (post_sq) { irdma_sc_cqp_post_sq(cqp); ret_code = irdma_cqp_poll_registers(cqp, tail, - cqp->dev->hw_attrs.max_done_count); + dev->hw_attrs.max_done_count); } cqp->process_cqp_sds = irdma_update_sds_noccq; + dev->ccq = NULL; return ret_code; } @@ -4792,8 +4756,11 @@ irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, int irdma_get_rdma_features(struct irdma_sc_dev *dev) { - int ret_code, byte_idx, feat_type, feat_cnt, feat_idx; struct irdma_dma_mem feat_buf; + u16 feat_cnt; + u16 feat_idx; + u8 feat_type; + int ret_code; u64 temp; feat_buf.size = IRDMA_FEATURE_BUF_SIZE; @@ -4834,13 +4801,18 @@ irdma_get_rdma_features(struct irdma_sc_dev *dev) } } - irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "QUERY RDMA FEATURES", feat_buf.va, - feat_cnt * 8); + irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "QUERY RDMA FEATURES", + feat_buf.va, feat_cnt * 8); - for (byte_idx = 0, feat_idx = 0; feat_idx < min(feat_cnt, IRDMA_MAX_FEATURES); - feat_idx++, byte_idx += 8) { - get_64bit_val(feat_buf.va, byte_idx, &temp); + for (feat_idx = 0; feat_idx < feat_cnt; feat_idx++) { + get_64bit_val(feat_buf.va, feat_idx * 8, &temp); feat_type = FIELD_GET(IRDMA_FEATURE_TYPE, temp); + + if (feat_type >= IRDMA_MAX_FEATURES) { + irdma_debug(dev, IRDMA_DEBUG_DEV, + "unknown feature type %u\n", feat_type); + continue; + } dev->feature_info[feat_type] = temp; } exit: @@ -4899,6 +4871,28 @@ cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, } /** + * irdma_cfg_sd_mem - allocate sd memory + * @dev: sc device struct + * @hmc_info: ptr to irdma_hmc_obj_info struct + */ +static int +irdma_cfg_sd_mem(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info) +{ + struct irdma_virt_mem virt_mem; + u32 mem_size; + + mem_size = sizeof(struct irdma_hmc_sd_entry) * hmc_info->sd_table.sd_cnt; + virt_mem.size = mem_size; + virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL); + if (!virt_mem.va) + return -ENOMEM; + hmc_info->sd_table.sd_entry = virt_mem.va; + + return 0; +} + +/** * irdma_cfg_fpm_val - configure HMC objects * @dev: sc device struct * @qp_count: desired qp count @@ -4906,10 +4900,8 @@ cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) { - struct irdma_virt_mem virt_mem; - u32 i, mem_size; u32 qpwanted, mrwanted, pblewanted; - u32 hte; + u32 hte, i; u32 sd_needed; u32 sd_diff; u32 loop_count = 0; @@ -4934,7 +4926,7 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; sd_needed = irdma_est_sd(dev, hmc_info); - irdma_debug(dev, IRDMA_DEBUG_HMC, "sd count %d where max sd is %d\n", + irdma_debug(dev, IRDMA_DEBUG_HMC, "sd count %u where max sd is %u\n", hmc_info->sd_table.sd_cnt, max_sds); qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt); @@ -4945,7 +4937,7 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt; irdma_debug(dev, IRDMA_DEBUG_HMC, - "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d, mc=%d, av=%d\n", + "req_qp=%d max_sd=%u, max_qp = %u, max_cq=%u, max_mr=%u, max_pble=%u, mc=%d, av=%u\n", qp_count, max_sds, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt, @@ -4959,8 +4951,7 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt; hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt; - if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) - hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1; + hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1; while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt) qpwanted /= 2; @@ -5013,11 +5004,12 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (!(loop_count % 2) && qpwanted > 128) { qpwanted /= 2; } else { - mrwanted /= 2; pblewanted /= 2; + mrwanted /= 2; } continue; } + if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF && pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) { pblewanted -= 256 * FPM_MULTIPLIER * sd_diff; @@ -5043,7 +5035,7 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (sd_needed > max_sds) { irdma_debug(dev, IRDMA_DEBUG_HMC, - "cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n", + "cfg_fpm failed loop_cnt=%u, sd_needed=%u, max sd count %u\n", loop_count, sd_needed, hmc_info->sd_table.sd_cnt); return -EINVAL; } @@ -5073,18 +5065,7 @@ irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) return ret_code; } - mem_size = sizeof(struct irdma_hmc_sd_entry) * - (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); - virt_mem.size = mem_size; - virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL); - if (!virt_mem.va) { - irdma_debug(dev, IRDMA_DEBUG_HMC, - "failed to allocate memory for sd_entry buffer\n"); - return -ENOMEM; - } - hmc_info->sd_table.sd_entry = virt_mem.va; - - return ret_code; + return irdma_cfg_sd_mem(dev, hmc_info); } /** @@ -5098,7 +5079,6 @@ irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, { int status; struct irdma_dma_mem val_mem; - bool alloc = false; dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++; switch (pcmdinfo->cqp_cmd) { @@ -5204,15 +5184,6 @@ irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, true, IRDMA_CQP_WAIT_EVENT); break; - case IRDMA_OP_STATS_ALLOCATE: - alloc = true; - /* fallthrough */ - case IRDMA_OP_STATS_FREE: - status = irdma_sc_manage_stats_inst(pcmdinfo->in.u.stats_manage.cqp, - &pcmdinfo->in.u.stats_manage.info, - alloc, - pcmdinfo->in.u.stats_manage.scratch); - break; case IRDMA_OP_STATS_GATHER: status = irdma_sc_gather_stats(pcmdinfo->in.u.stats_gather.cqp, &pcmdinfo->in.u.stats_gather.info, @@ -5381,6 +5352,7 @@ irdma_process_cqp_cmd(struct irdma_sc_dev *dev, status = irdma_exec_cqp_cmd(dev, pcmdinfo); else list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head); + pcmdinfo->cqp_cmd_exec_status = status; spin_unlock_irqrestore(&dev->cqp_lock, flags); return status; } @@ -5389,7 +5361,7 @@ irdma_process_cqp_cmd(struct irdma_sc_dev *dev, * irdma_process_bh - called from tasklet for cqp list * @dev: sc device struct */ -int +void irdma_process_bh(struct irdma_sc_dev *dev) { int status = 0; @@ -5402,10 +5374,9 @@ irdma_process_bh(struct irdma_sc_dev *dev) pcmdinfo = (struct cqp_cmds_info *)irdma_remove_cqp_head(dev); status = irdma_exec_cqp_cmd(dev, pcmdinfo); if (status) - break; + pcmdinfo->cqp_cmd_exec_status = status; } spin_unlock_irqrestore(&dev->cqp_lock, flags); - return status; } /** @@ -5464,7 +5435,7 @@ irdma_wait_pe_ready(struct irdma_sc_dev *dev) if (statuscpu0 == 0x80 && statuscpu1 == 0x80 && statuscpu2 == 0x80) return 0; - mdelay(1000); + mdelay(100); } while (retrycount++ < dev->hw_attrs.max_pe_ready_count); return -1; } @@ -5566,9 +5537,9 @@ static inline u64 irdma_stat_val(const u64 *stats_val, u16 byteoff, static inline u64 irdma_stat_delta(u64 new_val, u64 old_val, u64 max_val) { if (new_val >= old_val) return new_val - old_val; - else - /* roll-over case */ - return max_val - old_val + new_val + 1; + + /* roll-over case */ + return max_val - old_val + new_val + 1; } /** diff --git a/sys/dev/irdma/irdma_defs.h b/sys/dev/irdma/irdma_defs.h index fd3bf82c9ad6..a4bed8d5f93d 100644 --- a/sys/dev/irdma/irdma_defs.h +++ b/sys/dev/irdma/irdma_defs.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -66,7 +66,6 @@ #define IRDMA_DSCP_NUM_VAL 64 #define IRDMA_MAX_TRAFFIC_CLASS 8 #define IRDMA_MAX_STATS_COUNT 128 -#define IRDMA_FIRST_NON_PF_STAT 4 #define IRDMA_MIN_MTU_IPV4 576 #define IRDMA_MIN_MTU_IPV6 1280 @@ -115,11 +114,31 @@ #define IRDMA_BYTE_200 200 #define IRDMA_BYTE_208 208 #define IRDMA_BYTE_216 216 +#define IRDMA_BYTE_224 224 +#define IRDMA_BYTE_232 232 +#define IRDMA_BYTE_240 240 +#define IRDMA_BYTE_248 248 +#define IRDMA_BYTE_256 256 +#define IRDMA_BYTE_264 264 +#define IRDMA_BYTE_272 272 +#define IRDMA_BYTE_280 280 +#define IRDMA_BYTE_288 288 +#define IRDMA_BYTE_296 296 +#define IRDMA_BYTE_304 304 +#define IRDMA_BYTE_312 312 +#define IRDMA_BYTE_320 320 +#define IRDMA_BYTE_328 328 +#define IRDMA_BYTE_336 336 +#define IRDMA_BYTE_344 344 +#define IRDMA_BYTE_352 352 +#define IRDMA_BYTE_360 360 +#define IRDMA_BYTE_368 368 +#define IRDMA_BYTE_376 376 +#define IRDMA_BYTE_384 384 #define IRDMA_CQP_WAIT_POLL_REGS 1 #define IRDMA_CQP_WAIT_POLL_CQ 2 #define IRDMA_CQP_WAIT_EVENT 3 - #define IRDMA_AE_SOURCE_RSVD 0x0 #define IRDMA_AE_SOURCE_RQ 0x1 #define IRDMA_AE_SOURCE_RQ_0011 0x3 @@ -157,8 +176,8 @@ #define IRDMA_TCP_STATE_RESERVED_3 14 #define IRDMA_TCP_STATE_RESERVED_4 15 -#define IRDMA_CQP_SW_SQSIZE_4 4 -#define IRDMA_CQP_SW_SQSIZE_2048 2048 +#define IRDMA_CQP_SW_SQSIZE_MIN 4 +#define IRDMA_CQP_SW_SQSIZE_MAX 2048 #define IRDMA_CQ_TYPE_IWARP 1 #define IRDMA_CQ_TYPE_ILQ 2 @@ -202,6 +221,8 @@ #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 +#define IRDMA_DEFAULT_MAX_PUSH_LEN 8192 + #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 @@ -222,6 +243,7 @@ #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c #define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d + #define IRDMAQP_OP_GEN_RTS_AE 0x30 enum irdma_cqp_op_type { @@ -272,9 +294,9 @@ enum irdma_cqp_op_type { IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46, IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47, IRDMA_OP_CQ_MODIFY = 48, - + IRDMA_OP_WS_MOVE = 49, /* Must be last entry */ - IRDMA_MAX_CQP_OPS = 49, + IRDMA_MAX_CQP_OPS = 50, }; /* CQP SQ WQES */ @@ -322,6 +344,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQP_OP_MANAGE_STATS 0x2d #define IRDMA_CQP_OP_GATHER_STATS 0x2e #define IRDMA_CQP_OP_UP_MAP 0x2f +#define IRDMA_CQP_OP_MOVE_WS_NODES 0x34 #ifndef LS_64_1 #define LS_64_1(val, bits) ((u64)(uintptr_t)(val) << (bits)) @@ -420,6 +443,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42) #define IRDMA_CQPSQ_WS_OP_S 32 #define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32) +#define IRDMA_CQPSQ_WS_MOVE_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_WS_PARENTID_S 16 #define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(25, 16) #define IRDMA_CQPSQ_WS_NODEID_S 0 @@ -493,6 +517,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPHC_EN_REM_ENDPOINT_TRK_S 3 #define IRDMA_CQPHC_EN_REM_ENDPOINT_TRK BIT_ULL(3) +#define IRDMA_CQPHC_TMR_SLOT_S 16 +#define IRDMA_CQPHC_TMR_SLOT GENMASK_ULL(19, 16) #define IRDMA_CQPHC_ENABLED_VFS_S 32 #define IRDMA_CQPHC_ENABLED_VFS GENMASK_ULL(37, 32) @@ -672,10 +698,10 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_QP_QPCTX IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_QP_QPID_S 0 -#define IRDMA_CQPSQ_QP_QPID_M (0xFFFFFFUL) +#define IRDMA_CQPSQ_QP_QPID GENMASK_ULL(23, 0) #define IRDMA_CQPSQ_QP_OP_S 32 -#define IRDMA_CQPSQ_QP_OP_M IRDMACQ_OP_M +#define IRDMA_CQPSQ_QP_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_QP_ORDVALID_S 42 #define IRDMA_CQPSQ_QP_ORDVALID BIT_ULL(42) #define IRDMA_CQPSQ_QP_TOECTXVALID_S 43 @@ -751,6 +777,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STAG_MR BIT_ULL(43) #define IRDMA_CQPSQ_STAG_MWTYPE_S 42 #define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42) +#define IRDMA_CQPSQ_STAG_SKIPFLUSH_S 40 +#define IRDMA_CQPSQ_STAG_SKIPFLUSH BIT_ULL(40) #define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY_S 58 #define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58) @@ -767,8 +795,6 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STAG_VABASEDTO BIT_ULL(59) #define IRDMA_CQPSQ_STAG_USEHMCFNIDX_S 60 #define IRDMA_CQPSQ_STAG_USEHMCFNIDX BIT_ULL(60) -#define IRDMA_CQPSQ_STAG_USEPFRID_S 61 -#define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61) #define IRDMA_CQPSQ_STAG_PBA_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX @@ -882,12 +908,15 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_COMMIT_FPM_QPCNT_S 0 -#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(18, 0) +#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(20, 0) #define IRDMA_COMMIT_FPM_BASE_S 32 #define IRDMA_CQPSQ_CFPM_HMCFNID_S 0 #define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_CFPM_HW_FLUSH_TIMER_DISABLE_S 43 +#define IRDMA_CQPSQ_CFPM_HW_FLUSH_TIMER_DISABLE BIT_ULL(43) + #define IRDMA_CQPSQ_FWQE_AECODE_S 0 #define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_FWQE_AESOURCE_S 16 @@ -1123,9 +1152,9 @@ enum irdma_cqp_op_type { #define IRDMAQPC_RNRNAK_THRESH_S 54 #define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54) #define IRDMAQPC_TXCQNUM_S 0 -#define IRDMAQPC_TXCQNUM GENMASK_ULL(18, 0) +#define IRDMAQPC_TXCQNUM GENMASK_ULL(24, 0) #define IRDMAQPC_RXCQNUM_S 32 -#define IRDMAQPC_RXCQNUM GENMASK_ULL(50, 32) +#define IRDMAQPC_RXCQNUM GENMASK_ULL(56, 32) #define IRDMAQPC_STAT_INDEX_S 0 #define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0) #define IRDMAQPC_Q2ADDR_S 8 @@ -1213,6 +1242,8 @@ enum irdma_cqp_op_type { #define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48) #define IRDMA_RSVD_S 41 #define IRDMA_RSVD GENMASK_ULL(55, 41) +#define IRDMA_FEATURE_RSRC_MAX_S 0 +#define IRDMA_FEATURE_RSRC_MAX GENMASK_ULL(31, 0) #define IRDMAQPSQ_OPCODE_S 32 #define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32) @@ -1258,7 +1289,7 @@ enum irdma_cqp_op_type { #define IRDMAQPSQ_DESTQPN_S 32 #define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) #define IRDMAQPSQ_AHID_S 0 -#define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) +#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0) #define IRDMAQPSQ_INLINEDATAFLAG_S 57 #define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) @@ -1347,7 +1378,7 @@ enum irdma_cqp_op_type { #define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX_S 0 #define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0) #define IRDMA_QUERY_FPM_MAX_PE_SDS_S 32 -#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(45, 32) +#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32) #define IRDMA_QUERY_FPM_MAX_CEQS_S 0 #define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0) @@ -1422,9 +1453,9 @@ enum irdma_cqp_op_type { #define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \ { \ u32 size; \ - size = (_ring).size; \ + size = IRDMA_RING_SIZE(_ring); \ if (!IRDMA_RING_FULL_ERR(_ring)) { \ - (_ring).head = ((_ring).head + 1) % size; \ + IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ @@ -1433,79 +1464,40 @@ enum irdma_cqp_op_type { #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ u32 size; \ - size = (_ring).size; \ + size = IRDMA_RING_SIZE(_ring); \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \ - (_ring).head = ((_ring).head + (_count)) % size; \ - (_retcode) = 0; \ - } else { \ - (_retcode) = -ENOSPC; \ - } \ - } -#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ - { \ - u32 size; \ - size = (_ring).size; \ - if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \ - (_ring).head = ((_ring).head + 1) % size; \ - (_retcode) = 0; \ - } else { \ - (_retcode) = -ENOSPC; \ - } \ - } -#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ - { \ - u32 size; \ - size = (_ring).size; \ - if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \ - (_ring).head = ((_ring).head + (_count)) % size; \ + IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ } \ } -#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ - (_ring).head = ((_ring).head + (_count)) % (_ring).size -#define IRDMA_RING_MOVE_TAIL(_ring) \ - (_ring).tail = ((_ring).tail + 1) % (_ring).size +#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ + (IRDMA_RING_CURRENT_HEAD(_ring) = (IRDMA_RING_CURRENT_HEAD(_ring) + (_count)) % IRDMA_RING_SIZE(_ring)) #define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \ - (_ring).head = ((_ring).head + 1) % (_ring).size + IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, 1) #define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ - (_ring).tail = ((_ring).tail + (_count)) % (_ring).size + IRDMA_RING_CURRENT_TAIL(_ring) = (IRDMA_RING_CURRENT_TAIL(_ring) + (_count)) % IRDMA_RING_SIZE(_ring) + +#define IRDMA_RING_MOVE_TAIL(_ring) \ + IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, 1) #define IRDMA_RING_SET_TAIL(_ring, _pos) \ - (_ring).tail = (_pos) % (_ring).size + WRITE_ONCE(IRDMA_RING_CURRENT_TAIL(_ring), (_pos) % IRDMA_RING_SIZE(_ring)) #define IRDMA_RING_FULL_ERR(_ring) \ ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ - ) - -#define IRDMA_ERR_RING_FULL2(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \ - ) - -#define IRDMA_ERR_RING_FULL3(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \ + (IRDMA_RING_USED_QUANTA(_ring) == (IRDMA_RING_SIZE(_ring) - 1)) \ ) #define IRDMA_SQ_RING_FULL_ERR(_ring) \ ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \ + (IRDMA_RING_USED_QUANTA(_ring) == (IRDMA_RING_SIZE(_ring) - 257)) \ ) -#define IRDMA_ERR_SQ_RING_FULL2(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \ - ) -#define IRDMA_ERR_SQ_RING_FULL3(_ring) \ - ( \ - (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \ - ) #define IRDMA_RING_MORE_WORK(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) != 0) \ @@ -1513,17 +1505,17 @@ enum irdma_cqp_op_type { #define IRDMA_RING_USED_QUANTA(_ring) \ ( \ - (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ + ((READ_ONCE(IRDMA_RING_CURRENT_HEAD(_ring)) + IRDMA_RING_SIZE(_ring) - READ_ONCE(IRDMA_RING_CURRENT_TAIL(_ring))) % IRDMA_RING_SIZE(_ring)) \ ) #define IRDMA_RING_FREE_QUANTA(_ring) \ ( \ - ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \ + (IRDMA_RING_SIZE(_ring) - IRDMA_RING_USED_QUANTA(_ring) - 1) \ ) #define IRDMA_SQ_RING_FREE_QUANTA(_ring) \ ( \ - ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \ + (IRDMA_RING_SIZE(_ring) - IRDMA_RING_USED_QUANTA(_ring) - 257) \ ) #define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ diff --git a/sys/dev/irdma/irdma_hmc.c b/sys/dev/irdma/irdma_hmc.c index a3c47c8b1434..35c9373b9d86 100644 --- a/sys/dev/irdma/irdma_hmc.c +++ b/sys/dev/irdma/irdma_hmc.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -271,12 +271,18 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, bool pd_error = false; int ret_code = 0; - if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) + if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) { + irdma_debug(dev, IRDMA_DEBUG_ERR, + "invalid hmc obj type %u, start = %u, req cnt %u, cnt = %u\n", + info->rsrc_type, info->start_idx, info->count, + info->hmc_info->hmc_obj[info->rsrc_type].cnt); + return -EINVAL; + } if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) { - irdma_debug(dev, IRDMA_DEBUG_HMC, + irdma_debug(dev, IRDMA_DEBUG_ERR, "error type %u, start = %u, req cnt %u, cnt = %u\n", info->rsrc_type, info->start_idx, info->count, info->hmc_info->hmc_obj[info->rsrc_type].cnt); diff --git a/sys/dev/irdma/irdma_hw.c b/sys/dev/irdma/irdma_hw.c index 64c05b8663e0..9078a5a19b86 100644 --- a/sys/dev/irdma/irdma_hw.c +++ b/sys/dev/irdma/irdma_hw.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2025 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -147,8 +147,9 @@ irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq) sc_ceq = &ceq->sc_ceq; do { spin_lock_irqsave(&ceq->ce_lock, flags); + cq = irdma_sc_process_ceq(dev, sc_ceq); - if (!cq) { + if (!cq || rf->reset) { spin_unlock_irqrestore(&ceq->ce_lock, flags); break; } @@ -203,8 +204,10 @@ irdma_complete_cqp_request(struct irdma_cqp *cqp, /** * irdma_process_aeq - handle aeq events * @rf: RDMA PCI function + * + * Return: True if an AE was processed. */ -static void +static bool irdma_process_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; @@ -219,11 +222,10 @@ irdma_process_aeq(struct irdma_pci_f *rf) struct irdma_device *iwdev = rf->iwdev; struct irdma_qp_host_ctx_info *ctx_info = NULL; unsigned long flags; - u32 aeqcnt = 0; if (!sc_aeq->size) - return; + return false; do { memset(info, 0, sizeof(*info)); @@ -231,7 +233,16 @@ irdma_process_aeq(struct irdma_pci_f *rf) if (ret) break; + if (info->aeqe_overflow) { + irdma_dev_err(&iwdev->ibdev, "AEQ has overflowed\n"); + rf->reset = true; + rf->gen_ops.request_reset(rf); + return (aeqcnt > 0); + } + aeqcnt++; + atomic_inc(&iwdev->ae_info.ae_cnt); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "ae_id = 0x%x (%s), is_qp = %d, qp_id = %d, tcp_state = %d, iwarp_state = %d, ae_src = %d\n", info->ae_id, irdma_get_ae_desc(info->ae_id), @@ -265,8 +276,12 @@ irdma_process_aeq(struct irdma_pci_f *rf) spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = info->tcp_state; iwqp->hw_iwarp_state = info->iwarp_state; - if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE) + + if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE) { iwqp->last_aeq = info->ae_id; + iwqp->ae_src = info->ae_src; + } + spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; } else { @@ -397,10 +412,6 @@ irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: - irdma_dev_err(&iwdev->ibdev, - "AEQ: abnormal ae_id = 0x%x (%s), is_qp = %d, qp_id = %d, ae_source = %d\n", - info->ae_id, irdma_get_ae_desc(info->ae_id), - info->qp, info->qp_cq_id, info->ae_src); if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) { ctx_info->roce_info->err_rq_idx_valid = info->err_rq_idx_valid; if (info->rq) { @@ -435,6 +446,8 @@ irdma_process_aeq(struct irdma_pci_f *rf) if (aeqcnt) irdma_sc_repost_aeq_entries(dev, aeqcnt); + + return (aeqcnt > 0); } /** @@ -449,11 +462,11 @@ irdma_ena_intr(struct irdma_sc_dev *dev, u32 msix_id) } /** - * irdma_dpc - tasklet for aeq and ceq 0 + * irdma_aeq_ceq0_tasklet_cb - tasklet for aeq and ceq 0 * @t: tasklet_struct ptr */ static void -irdma_dpc(unsigned long t) +irdma_aeq_ceq0_tasklet_cb(unsigned long t) { struct irdma_pci_f *rf = from_tasklet(rf, (struct tasklet_struct *)t, dpc_tasklet); @@ -465,11 +478,11 @@ irdma_dpc(unsigned long t) } /** - * irdma_ceq_dpc - dpc handler for CEQ + * irdma_ceq_tasklet_cb - tasklet handler for CEQ * @t: tasklet_struct ptr */ static void -irdma_ceq_dpc(unsigned long t) +irdma_ceq_tasklet_cb(unsigned long t) { struct irdma_ceq *iwceq = from_tasklet(iwceq, (struct tasklet_struct *)t, dpc_tasklet); @@ -502,7 +515,7 @@ irdma_save_msix_info(struct irdma_pci_f *rf) size = sizeof(struct irdma_msix_vector) * rf->msix_count; size += sizeof(*iw_qvlist); - size += sizeof(*iw_qvinfo) * rf->msix_count - 1; + size += sizeof(*iw_qvinfo) * (rf->msix_count - 1); rf->iw_msixtbl = kzalloc(size, GFP_KERNEL); if (!rf->iw_msixtbl) return -ENOMEM; @@ -538,11 +551,11 @@ irdma_save_msix_info(struct irdma_pci_f *rf) } /** - * irdma_irq_handler - interrupt handler for aeq and ceq0 + * irdma_aeq_ceq0_irq_handler - interrupt handler for aeq and ceq0 * @data: RDMA PCI function */ static void -irdma_irq_handler(void *data) +irdma_aeq_ceq0_irq_handler(void *data) { struct irdma_pci_f *rf = data; @@ -550,11 +563,11 @@ irdma_irq_handler(void *data) } /** - * irdma_ceq_handler - interrupt handler for ceq + * irdma_ceq_irq_handler - interrupt handler for ceq * @data: ceq pointer */ static void -irdma_ceq_handler(void *data) +irdma_ceq_irq_handler(void *data) { struct irdma_ceq *iwceq = data; @@ -625,8 +638,6 @@ irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) struct irdma_cqp *cqp = &rf->cqp; int status = 0; - if (rf->cqp_cmpl_wq) - destroy_workqueue(rf->cqp_cmpl_wq); status = irdma_sc_cqp_destroy(dev->cqp, free_hwcqp); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "Destroy CQP failed %d\n", status); @@ -794,6 +805,8 @@ irdma_destroy_ccq(struct irdma_pci_f *rf) struct irdma_ccq *ccq = &rf->ccq; int status = 0; + if (rf->cqp_cmpl_wq) + destroy_workqueue(rf->cqp_cmpl_wq); if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) @@ -964,7 +977,7 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, static int irdma_create_cqp(struct irdma_pci_f *rf) { - u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048; + u32 sqsize = IRDMA_CQP_SW_SQSIZE_MAX; struct irdma_dma_mem mem; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp_init_info cqp_init_info = {0}; @@ -1012,6 +1025,7 @@ irdma_create_cqp(struct irdma_pci_f *rf) cqp_init_info.scratch_array = cqp->scratch_array; cqp_init_info.protocol_used = rf->protocol_used; cqp_init_info.en_rem_endpoint_trk = rf->en_rem_endpoint_trk; + cqp_init_info.timer_slots = rf->timer_slots; memcpy(&cqp_init_info.dcqcn_params, &rf->dcqcn_params, sizeof(cqp_init_info.dcqcn_params)); @@ -1077,12 +1091,13 @@ irdma_create_ccq(struct irdma_pci_f *rf) struct irdma_ccq_init_info info = {0}; struct irdma_ccq *ccq = &rf->ccq; int status; + int ccq_size = IW_CCQ_SIZE; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; info.dev = dev; ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area); - ccq->mem_cq.size = sizeof(struct irdma_cqe) * IW_CCQ_SIZE; + ccq->mem_cq.size = sizeof(struct irdma_cqe) * ccq_size; ccq->mem_cq.va = irdma_allocate_dma_mem(dev->hw, &ccq->mem_cq, ccq->mem_cq.size, IRDMA_CQ0_ALIGNMENT); @@ -1099,7 +1114,7 @@ irdma_create_ccq(struct irdma_pci_f *rf) /* populate the ccq init info */ info.cq_base = ccq->mem_cq.va; info.cq_pa = ccq->mem_cq.pa; - info.num_elem = IW_CCQ_SIZE; + info.num_elem = ccq_size; info.shadow_area = ccq->shadow_area.va; info.shadow_area_pa = ccq->shadow_area.pa; info.ceqe_mask = false; @@ -1205,8 +1220,8 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, if (rf->msix_shared && !ceq_id) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQCEQ-0", dev_name(&rf->pcidev->dev)); - tasklet_setup(&rf->dpc_tasklet, irdma_dpc); - status = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); + tasklet_setup(&rf->dpc_tasklet, irdma_aeq_ceq0_tasklet_cb); + status = irdma_irq_request(rf, msix_vec, irdma_aeq_ceq0_irq_handler, rf); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); @@ -1214,9 +1229,9 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-CEQ-%d", dev_name(&rf->pcidev->dev), ceq_id); - tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_dpc); + tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_tasklet_cb); - status = irdma_irq_request(rf, msix_vec, irdma_ceq_handler, iwceq); + status = irdma_irq_request(rf, msix_vec, irdma_ceq_irq_handler, iwceq); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); @@ -1243,8 +1258,8 @@ irdma_cfg_aeq_vector(struct irdma_pci_f *rf) if (!rf->msix_shared) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQ", dev_name(&rf->pcidev->dev)); - tasklet_setup(&rf->dpc_tasklet, irdma_dpc); - status = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); + tasklet_setup(&rf->dpc_tasklet, irdma_aeq_ceq0_tasklet_cb); + status = irdma_irq_request(rf, msix_vec, irdma_aeq_ceq0_irq_handler, rf); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); @@ -1277,7 +1292,6 @@ irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, int status; struct irdma_ceq_init_info info = {0}; struct irdma_sc_dev *dev = &rf->sc_dev; - u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; @@ -1300,14 +1314,13 @@ irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; - scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else - status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); + status = irdma_sc_cceq_create(&iwceq->sc_ceq); } if (status) { @@ -1576,7 +1589,7 @@ irdma_initialize_ilq(struct irdma_device *iwdev) info.buf_size = 1024; info.tx_buf_cnt = 2 * info.sq_size; info.receive = irdma_receive_ilq; - info.xmit_complete = irdma_free_sqbuf; + info.xmit_complete = irdma_cm_ilq_cmpl_handler; status = irdma_puda_create_rsrc(&iwdev->vsi, &info); if (status) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR, "ilq create fail\n"); @@ -1827,6 +1840,16 @@ irdma_setup_init_state(struct irdma_pci_f *rf) if (status) goto clean_obj_mem; + /* + * Apply sysctl settings to max_hw_ird/ord + */ + rf->sc_dev.hw_attrs.max_hw_ird = irdma_sysctl_max_ird; + rf->sc_dev.hw_attrs.max_hw_ord = irdma_sysctl_max_ord; + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, + "using max_hw_ird = %d and max_hw_ord = %d\n", + rf->sc_dev.hw_attrs.max_hw_ird, + rf->sc_dev.hw_attrs.max_hw_ord); + return 0; clean_obj_mem: @@ -2060,7 +2083,7 @@ irdma_ctrl_init_hw(struct irdma_pci_f *rf) break; rf->init_state = CEQ0_CREATED; /* Handles processing of CQP completions */ - rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq", + rf->cqp_cmpl_wq = alloc_ordered_workqueue("irdma-cqp_cmpl_wq", WQ_HIGHPRI | WQ_UNBOUND); if (!rf->cqp_cmpl_wq) { status = -ENOMEM; @@ -2373,6 +2396,7 @@ irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) cqp_info->cqp_cmd = IRDMA_OP_ADD_LOCAL_MAC_ENTRY; cqp_info->in.u.add_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); @@ -2406,6 +2430,8 @@ irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) cqp_info->post_sq = 1; cqp_info->in.u.alloc_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.alloc_local_mac_entry.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; + status = irdma_handle_cqp_op(rf, cqp_request); if (!status) *mac_tbl_idx = (u16)cqp_request->compl_info.op_ret_val; @@ -2436,7 +2462,6 @@ irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; - memset(info, 0, sizeof(*info)); info->add = add_port; info->port = accel_local_port; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY; @@ -2519,35 +2544,22 @@ irdma_del_apbvt(struct irdma_device *iwdev, spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); } -/** - * irdma_manage_arp_cache - manage hw arp cache - * @rf: RDMA PCI function - * @mac_addr: mac address ptr - * @ip_addr: ip addr for arp cache - * @action: add, delete or modify - */ void -irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, - u32 *ip_addr, u32 action) +irdma_arp_cqp_op(struct irdma_pci_f *rf, u16 arp_index, + const unsigned char *mac_addr, u32 action) { struct irdma_add_arp_cache_entry_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - int arp_index; - - arp_index = irdma_arp_table(rf, ip_addr, mac_addr, action); - if (arp_index == -1) - return; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) return; cqp_info = &cqp_request->info; - if (action == IRDMA_ARP_ADD) { + if (action == IRDMA_ARP_ADD_UPDATE) { cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY; info = &cqp_info->in.u.add_arp_cache_entry.info; - memset(info, 0, sizeof(*info)); info->arp_index = (u16)arp_index; info->permanent = true; ether_addr_copy(info->mac_addr, mac_addr); @@ -2568,6 +2580,26 @@ irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, } /** + * irdma_manage_arp_cache - manage hw arp cache + * @rf: RDMA PCI function + * @mac_addr: mac address ptr + * @ip_addr: ip addr for arp cache + * @action: add, delete or modify + */ +void +irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, + u32 *ip_addr, u32 action) +{ + int arp_index; + + arp_index = irdma_arp_table(rf, ip_addr, mac_addr, action); + if (arp_index == -1) + return; + + irdma_arp_cqp_op(rf, (u16)arp_index, mac_addr, action); +} + +/** * irdma_send_syn_cqp_callback - do syn/ack after qhash * @cqp_request: qhash cqp completion */ @@ -2577,7 +2609,7 @@ irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request) struct irdma_cm_node *cm_node = cqp_request->param; irdma_send_syn(cm_node, 1); - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); } /** @@ -2598,7 +2630,6 @@ irdma_qhash_info_prepare(struct irdma_device *iwdev, struct irdma_qhash_table_info *info; info = &cqp_info->in.u.manage_qhash_table_entry.info; - memset(info, 0, sizeof(*info)); info->vsi = &iwdev->vsi; info->manage = mtype; info->entry_type = etype; @@ -2731,7 +2762,7 @@ irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, cqp_request->callback_fcn = irdma_send_syn_cqp_callback; cqp_request->param = cmnode; if (!wait) - atomic_inc(&cm_node->refcnt); + irdma_add_ref_cmnode(cm_node); } if (info->ipv4_valid) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, @@ -2759,7 +2790,7 @@ irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (status && cm_node && !wait) - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); irdma_put_cqp_request(iwcqp, cqp_request); @@ -2873,6 +2904,9 @@ irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ)) return; + if (atomic_cmpxchg(&iwqp->flush_issued, 0, 1)) + return; + /* Set flush info fields */ info.sq = flush_mask & IRDMA_FLUSH_SQ; info.rq = flush_mask & IRDMA_FLUSH_RQ; @@ -2896,7 +2930,9 @@ irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) if (info.rq && iwqp->sc_qp.rq_flush_code) info.rq_minor_code = flush_code; } - if (irdma_upload_context && irdma_upload_qp_context(iwqp, 0, 1)) + if (irdma_upload_context && + irdma_upload_qp_context(rf, iwqp->sc_qp.qp_uk.qp_id, + iwqp->sc_qp.qp_uk.qp_type, 0, 1)) irdma_dev_warn(&iwqp->iwdev->ibdev, "failed to upload QP context\n"); if (!iwqp->user_mode) irdma_sched_qp_flush_work(iwqp); @@ -2905,5 +2941,4 @@ irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) /* Issue flush */ (void)irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &info, flush_mask & IRDMA_FLUSH_WAIT); - iwqp->flush_issued = true; } diff --git a/sys/dev/irdma/irdma_kcompat.c b/sys/dev/irdma/irdma_kcompat.c index 4261fb45d390..51f44133252c 100644 --- a/sys/dev/irdma/irdma_kcompat.c +++ b/sys/dev/irdma/irdma_kcompat.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -470,25 +470,49 @@ irdma_create_ah_wait(struct irdma_pci_f *rf, int ret; if (!sleep) { - int cnt = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms * - CQP_TIMEOUT_THRESHOLD; + bool timeout = false; + u64 start = get_jiffies_64(); + u64 completed_ops = atomic64_read(&rf->sc_dev.cqp->completed_ops); struct irdma_cqp_request *cqp_request = sc_ah->ah_info.cqp_request; + const u64 timeout_jiffies = + msecs_to_jiffies(rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms * + CQP_TIMEOUT_THRESHOLD); + + /* + * NOTE: irdma_check_cqp_progress is not used here because it relies on a notion of a cycle count, but + * we want to avoid unnecessary delays. We are in an atomic context here, so we might as well check in + * a tight loop. + */ + while (!READ_ONCE(cqp_request->request_done)) { + u64 tmp; + u64 curr_jiffies; - do { irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); - mdelay(1); - } while (!READ_ONCE(cqp_request->request_done) && --cnt); - if (cnt && !cqp_request->compl_info.op_ret_val) { + curr_jiffies = get_jiffies_64(); + tmp = atomic64_read(&rf->sc_dev.cqp->completed_ops); + if (tmp != completed_ops) { + /* CQP is progressing. Reset timer. */ + completed_ops = tmp; + start = curr_jiffies; + } + + if ((curr_jiffies - start) > timeout_jiffies) { + timeout = true; + break; + } + } + + if (!timeout && !cqp_request->compl_info.op_ret_val) { irdma_put_cqp_request(&rf->cqp, cqp_request); sc_ah->ah_info.ah_valid = true; } else { - ret = !cnt ? -ETIMEDOUT : -EINVAL; + ret = timeout ? -ETIMEDOUT : -EINVAL; irdma_dev_err(&rf->iwdev->ibdev, "CQP create AH error ret = %d opt_ret_val = %d", ret, cqp_request->compl_info.op_ret_val); irdma_put_cqp_request(&rf->cqp, cqp_request); - if (!cnt && !rf->reset) { + if (timeout && !rf->reset) { rf->reset = true; rf->gen_ops.request_reset(rf); } @@ -501,19 +525,10 @@ irdma_create_ah_wait(struct irdma_pci_f *rf, #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) -/** - * irdma_create_ah - create address handle - * @ib_ah: ptr to AH - * @attr: address handle attributes - * @flags: AH flags to wait - * @udata: user data - * - * returns 0 on success, error otherwise - */ -int -irdma_create_ah(struct ib_ah *ib_ah, - struct ib_ah_attr *attr, u32 flags, - struct ib_udata *udata) +static int +irdma_create_sleepable_ah(struct ib_ah *ib_ah, + struct ib_ah_attr *attr, u32 flags, + struct ib_udata *udata) { struct irdma_pd *pd = to_iwpd(ib_ah->pd); struct irdma_ah *ah = container_of(ib_ah, struct irdma_ah, ibah); @@ -613,6 +628,23 @@ err_gid_l2: return err; } +/** + * irdma_create_ah - create address handle + * @ib_ah: ptr to AH + * @attr: address handle attributes + * @flags: AH flags to wait + * @udata: user data + * + * returns 0 on success, error otherwise + */ +int +irdma_create_ah(struct ib_ah *ib_ah, + struct ib_ah_attr *attr, u32 flags, + struct ib_udata *udata) +{ + return irdma_create_sleepable_ah(ib_ah, attr, flags, udata); +} + void irdma_ether_copy(u8 *dmac, struct ib_ah_attr *attr) { @@ -691,6 +723,7 @@ irdma_create_qp(struct ib_pd *ibpd, struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {{0}}; struct irdma_qp_host_ctx_info *ctx_info; + u32 next_qp = 0; unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); @@ -743,6 +776,9 @@ irdma_create_qp(struct ib_pd *ibpd, if (init_attr->qp_type == IB_QPT_GSI) qp_num = 1; + else if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) + err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, + &qp_num, &next_qp); else err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, &qp_num, &rf->next_qp); @@ -759,7 +795,7 @@ irdma_create_qp(struct ib_pd *ibpd, iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; - init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; + init_info.qp_uk_init_info.qp_id = qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; @@ -769,10 +805,11 @@ irdma_create_qp(struct ib_pd *ibpd, spin_lock_init(&iwqp->dwork_flush_lock); if (udata) { + INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_user_flush_worker); init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, init_attr); } else { - INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); + INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_kern_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); } @@ -946,7 +983,6 @@ irdma_create_cq(struct ib_cq *ibcq, unsigned long flags; int err_code; int entries = attr->cqe; - bool cqe_64byte_ena; err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) @@ -966,10 +1002,9 @@ irdma_create_cq(struct ib_cq *ibcq, INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); info.dev = dev; - ukinfo->cq_size = max(entries, 4); + ukinfo->cq_size = max_t(int, entries, 4); ukinfo->cq_id = cq_num; - cqe_64byte_ena = (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? true : false; - ukinfo->avoid_mem_cflct = cqe_64byte_ena; + iwcq->cq_num = cq_num; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; atomic_set(&iwcq->armed, 0); if (attr->comp_vector < rf->ceqs_count) @@ -1004,8 +1039,6 @@ irdma_create_cq(struct ib_cq *ibcq, err_code = -EPROTO; goto cq_free_rsrc; } - iwcq->iwpbl = iwpbl; - iwcq->cq_mem_size = 0; cqmr = &iwpbl->cq_mr; if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & @@ -1019,7 +1052,6 @@ irdma_create_cq(struct ib_cq *ibcq, err_code = -EPROTO; goto cq_free_rsrc; } - iwcq->iwpbl_shadow = iwpbl_shadow; cqmr_shadow = &iwpbl_shadow->cq_mr; info.shadow_area_pa = cqmr_shadow->cq_pbl.addr; cqmr->split = true; @@ -1043,14 +1075,11 @@ irdma_create_cq(struct ib_cq *ibcq, } entries++; - if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) entries *= 2; ukinfo->cq_size = entries; - if (cqe_64byte_ena) - rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe); - else - rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); + rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); iwcq->kmem.size = round_up(rsize, IRDMA_HW_PAGE_SIZE); iwcq->kmem.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem, iwcq->kmem.size, IRDMA_HW_PAGE_SIZE); @@ -1094,6 +1123,7 @@ irdma_create_cq(struct ib_cq *ibcq, cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.check_overflow = true; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) { @@ -1114,7 +1144,7 @@ irdma_create_cq(struct ib_cq *ibcq, } } - rf->cq_table[cq_num] = iwcq; + WRITE_ONCE(rf->cq_table[cq_num], iwcq); init_completion(&iwcq->free_cq); return 0; @@ -1227,6 +1257,86 @@ done: return 0; } +/** + * irdma_reg_user_mr - Register a user memory region + * @pd: ptr of pd + * @start: virtual start address + * @len: length of mr + * @virt: virtual address + * @access: access of mr + * @udata: user data + */ +struct ib_mr * +irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, + u64 virt, int access, + struct ib_udata *udata) +{ +#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) + struct irdma_device *iwdev = to_iwdev(pd->device); + struct irdma_mem_reg_req req = {}; + struct ib_umem *region; + struct irdma_mr *iwmr; + int err; + + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) + return ERR_PTR(-EINVAL); + + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + + region = ib_umem_get(pd->uobject->context, start, len, access, 0); + + if (IS_ERR(region)) { + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "Failed to create ib_umem region err=%ld\n", + PTR_ERR(region)); + return (struct ib_mr *)region; + } + + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { + ib_umem_release(region); + return ERR_PTR(-EFAULT); + } + + iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type); + if (IS_ERR(iwmr)) { + ib_umem_release(region); + return (struct ib_mr *)iwmr; + } + + switch (req.reg_type) { + case IRDMA_MEMREG_TYPE_QP: + err = irdma_reg_user_mr_type_qp(req, udata, iwmr); + if (err) + goto error; + + break; + case IRDMA_MEMREG_TYPE_CQ: + err = irdma_reg_user_mr_type_cq(req, udata, iwmr); + if (err) + goto error; + + break; + case IRDMA_MEMREG_TYPE_MEM: + err = irdma_reg_user_mr_type_mem(iwmr, access, true); + if (err) + goto error; + + break; + default: + err = -EINVAL; + goto error; + } + + return &iwmr->ibmr; + +error: + ib_umem_release(region); + irdma_free_iwmr(iwmr); + + return ERR_PTR(err); +} + /* * irdma_rereg_user_mr - Re-Register a user memory region @ibmr: ib mem to access iwarp mr pointer @flags: bit mask to * indicate which of the attr's of MR modified @start: virtual start address @len: length of mr @virt: virtual address @@ -1512,20 +1622,19 @@ irdma_query_port(struct ib_device *ibdev, u8 port, /* no need to zero out pros here. done by caller */ props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(if_getmtu(netdev)); + props->active_mtu = min(props->max_mtu, iboe_get_mtu(if_getmtu(netdev))); props->lid = 1; props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; - if ((if_getlinkstate(netdev) == LINK_STATE_UP) && - (if_getdrvflags(netdev) & IFF_DRV_RUNNING)) { + if ((if_getlinkstate(netdev) == LINK_STATE_UP) && (if_getdrvflags(netdev) & IFF_DRV_RUNNING)) { props->state = IB_PORT_ACTIVE; props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } - ib_get_eth_speed(ibdev, port, &props->active_speed, &props->active_width); + ib_get_eth_speed(ibdev, port, (u16 *)& props->active_speed, &props->active_width); if (rdma_protocol_roce(ibdev, 1)) { props->gid_tbl_len = 32; @@ -1720,36 +1829,104 @@ kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev) iwdev->ibdev.uverbs_ex_cmd_mask |= BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ); } -int -ib_get_eth_speed(struct ib_device *ibdev, u32 port_num, u8 *speed, u8 *width) +static void +ib_get_width_and_speed(u32 netdev_speed, u32 lanes, + u16 *speed, u8 *width) { - if_t netdev = ibdev->get_netdev(ibdev, port_num); - u32 netdev_speed; + if (!lanes) { + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_50000) { + *width = IB_WIDTH_2X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_100000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_200000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_HDR; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_NDR; + } - if (!netdev) - return -ENODEV; + return; + } - netdev_speed = if_getbaudrate(netdev); - dev_put(netdev); - if (netdev_speed <= SPEED_1000) { + switch (lanes) { + case 1: *width = IB_WIDTH_1X; - *speed = IB_SPEED_SDR; - } else if (netdev_speed <= SPEED_10000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_FDR10; - } else if (netdev_speed <= SPEED_20000) { + break; + case 2: + *width = IB_WIDTH_2X; + break; + case 4: *width = IB_WIDTH_4X; - *speed = IB_SPEED_DDR; - } else if (netdev_speed <= SPEED_25000) { + break; + case 8: + *width = IB_WIDTH_8X; + break; + case 12: + *width = IB_WIDTH_12X; + break; + default: *width = IB_WIDTH_1X; - *speed = IB_SPEED_EDR; - } else if (netdev_speed <= SPEED_40000) { - *width = IB_WIDTH_4X; + } + + switch (netdev_speed / lanes) { + case SPEED_2500: + *speed = IB_SPEED_SDR; + break; + case SPEED_5000: + *speed = IB_SPEED_DDR; + break; + case SPEED_10000: *speed = IB_SPEED_FDR10; - } else { - *width = IB_WIDTH_4X; + break; + case SPEED_14000: + *speed = IB_SPEED_FDR; + break; + case SPEED_25000: *speed = IB_SPEED_EDR; + break; + case SPEED_50000: + *speed = IB_SPEED_HDR; + break; + case SPEED_100000: + *speed = IB_SPEED_NDR; + break; + default: + *speed = IB_SPEED_SDR; } +} + +int +ib_get_eth_speed(struct ib_device *ibdev, u32 port_num, u16 *speed, u8 *width) +{ + if_t netdev = ibdev->get_netdev(ibdev, port_num); + u32 netdev_speed, lanes; + + if (!netdev) + return -ENODEV; + + netdev_speed = (u32)if_getbaudrate(netdev); + dev_put(netdev); + lanes = 0; + + ib_get_width_and_speed(netdev_speed, lanes, speed, width); return 0; } diff --git a/sys/dev/irdma/irdma_main.h b/sys/dev/irdma/irdma_main.h index 9181f3b70463..1dc455532819 100644 --- a/sys/dev/irdma/irdma_main.h +++ b/sys/dev/irdma/irdma_main.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2025 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -74,16 +74,17 @@ extern bool irdma_upload_context; #define IRDMA_FW_VER_DEFAULT 2 #define IRDMA_HW_VER 2 -#define IRDMA_ARP_ADD 1 +#define IRDMA_ARP_ADD_UPDATE 1 +#define IRDMA_ARP_ADD IRDMA_ARP_ADD_UPDATE #define IRDMA_ARP_DELETE 2 #define IRDMA_ARP_RESOLVE 3 #define IRDMA_MACIP_ADD 1 #define IRDMA_MACIP_DELETE 2 -#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 1) -#define IW_CEQ_SIZE 2048 -#define IW_AEQ_SIZE 2048 +#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_MAX + 2) +#define IW_CEQ_SIZE 2048 +#define IW_AEQ_SIZE 2048 #define RX_BUF_SIZE (1536 + 8) #define IW_REG0_SIZE (4 * 1024) @@ -96,6 +97,7 @@ extern bool irdma_upload_context; #define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT_MS 10000 +#define IRDMA_RETRY_PRINT_MS 5000 #define IRDMA_RST_TIMEOUT_HZ 4 #define IRDMA_NO_QSET 0xffff @@ -107,7 +109,6 @@ extern bool irdma_upload_context; #define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3 #define IRDMA_Q_TYPE_PE_AEQ 0x80 -#define IRDMA_Q_INVALID_IDX 0xffff #define IRDMA_REM_ENDPOINT_TRK_QPID 3 #define IRDMA_DRV_OPT_ENA_MPA_VER_0 0x00000001 @@ -228,6 +229,8 @@ struct irdma_aeq { struct irdma_arp_entry { u32 ip_addr[4]; u8 mac_addr[ETHER_ADDR_LEN]; + atomic_t refcnt; + bool delete_pending:1; }; struct irdma_msix_vector { @@ -314,6 +317,7 @@ struct irdma_pci_f { u32 next_ws_node_id; u32 max_ws_node_id; u32 limits_sel; + u8 timer_slots; unsigned long *allocated_ws_nodes; unsigned long *allocated_qps; unsigned long *allocated_cqs; @@ -351,8 +355,6 @@ struct irdma_pci_f { struct msix_entry msix_info; struct irdma_dma_mem obj_mem; struct irdma_dma_mem obj_next; - atomic_t vchnl_msgs; - wait_queue_head_t vchnl_waitq; struct workqueue_struct *cqp_cmpl_wq; struct work_struct cqp_cmpl_work; struct irdma_sc_vsi default_vsi; @@ -361,6 +363,15 @@ struct irdma_pci_f { void (*check_fc)(struct irdma_sc_vsi *vsi, struct irdma_sc_qp *sc_qp); struct irdma_dcqcn_cc_params dcqcn_params; struct irdma_device *iwdev; + struct delayed_work dwork_cqp_poll; + u32 chk_stag; +}; + +struct irdma_ae_info { + spinlock_t info_lock; + atomic_t ae_cnt; + u32 retry_cnt; + unsigned long retry_delay; }; struct irdma_device { @@ -372,6 +383,7 @@ struct irdma_device { struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; + struct irdma_ae_info ae_info; u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; @@ -395,7 +407,6 @@ struct irdma_device { bool dcb_vlan_mode:1; bool iw_ooo:1; enum init_completion_state init_state; - wait_queue_head_t suspend_wq; }; @@ -522,6 +533,8 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp); void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); +void irdma_arp_cqp_op(struct irdma_pci_f *rf, u16 arp_index, + const unsigned char *mac_addr, u32 action); void irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, u32 *ip_addr, u32 action); struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); @@ -555,6 +568,8 @@ void irdma_cq_add_ref(struct ib_cq *ibcq); void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); +void irdma_chk_free_stag(struct irdma_pci_f *rf); +void cqp_poll_worker(struct work_struct *work); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_modify_qp_info *info, bool wait); @@ -565,7 +580,7 @@ int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, bool wait); int irdma_add_qhash_wait_no_lock(struct irdma_device *iwdev, struct irdma_cm_info *cminfo); void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf); -void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp); +void irdma_cm_ilq_cmpl_handler(struct irdma_sc_vsi *vsi, void *bufp); void irdma_free_qp_rsrc(struct irdma_qp *iwqp); int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core); @@ -586,8 +601,9 @@ u16 irdma_get_vlan_ipv4(struct iw_cm_id *cm_id, u32 *addr); void irdma_get_vlan_mac_ipv6(struct iw_cm_id *cm_id, u32 *addr, u16 *vlan_id, u8 *mac); struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, - int acc, u64 *iova_start); -int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); + int acc, u64 *iova_start, bool dma_mr); +int irdma_upload_qp_context(struct irdma_pci_f *rf, u32 qpn, + u8 qp_type, bool freeze, bool raw); void irdma_del_hmc_objects(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, bool privileged, bool reset, enum irdma_vers vers); @@ -597,7 +613,6 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, void (*callback_fcn)(struct irdma_cqp_request *cqp_request), void *cb_param); void irdma_udqp_qs_worker(struct work_struct *work); -bool irdma_cq_empty(struct irdma_cq *iwcq); int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr); void irdma_unregister_notifiers(struct irdma_device *iwdev); diff --git a/sys/dev/irdma/irdma_pble.c b/sys/dev/irdma/irdma_pble.c index aaf9d8917622..9d6d89b3f881 100644 --- a/sys/dev/irdma/irdma_pble.c +++ b/sys/dev/irdma/irdma_pble.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -108,7 +108,7 @@ static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) { - idx->sd_idx = (u32)pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; + idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE); idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE); idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD); } @@ -545,12 +545,14 @@ void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc) { - pble_rsrc->freedpbles += palloc->total_cnt; - if (palloc->level == PBLE_LEVEL_2) free_lvl2(pble_rsrc, palloc); else irdma_prm_return_pbles(&pble_rsrc->pinfo, &palloc->level1.chunkinfo); + + mutex_lock(&pble_rsrc->pble_mutex_lock); + pble_rsrc->freedpbles += palloc->total_cnt; pble_rsrc->stats_alloc_freed++; + mutex_unlock(&pble_rsrc->pble_mutex_lock); } diff --git a/sys/dev/irdma/irdma_protos.h b/sys/dev/irdma/irdma_protos.h index 0663f9591d52..09a35e5a1f97 100644 --- a/sys/dev/irdma/irdma_protos.h +++ b/sys/dev/irdma/irdma_protos.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2016 - 2023 Intel Corporation + * Copyright (c) 2016 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -52,6 +52,7 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, bool post_sq); void irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, + u8 prio, u16 qs_handle); /* HMC/FPM functions */ int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u16 hmc_fn_id); @@ -64,8 +65,6 @@ int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, u8 op); int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, u8 op); -int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info); void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats, struct irdma_gather_stats *gather_stats, struct irdma_gather_stats *last_gather_stats, @@ -115,7 +114,7 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev); void free_sd_mem(struct irdma_sc_dev *dev); int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, struct cqp_cmds_info *pcmdinfo); -int irdma_process_bh(struct irdma_sc_dev *dev); +void irdma_process_bh(struct irdma_sc_dev *dev); extern void dump_ctx(struct irdma_sc_dev *dev, u32 pf_num, u32 qp_num); void dumpCSR(struct irdma_sc_dev *dev); void dumpCSRx(struct irdma_sc_dev *dev); diff --git a/sys/dev/irdma/irdma_puda.c b/sys/dev/irdma/irdma_puda.c index 0c5b9c164d76..5dc978259685 100644 --- a/sys/dev/irdma/irdma_puda.c +++ b/sys/dev/irdma/irdma_puda.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -435,6 +435,7 @@ irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, /* reusing so synch the buffer for CPU use */ dma_sync_single_for_cpu(hw_to_dev(dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); IRDMA_RING_SET_TAIL(qp->sq_ring, info.wqe_idx); + buf->queued = false; rsrc->xmit_complete(rsrc->vsi, buf); spin_lock_irqsave(&rsrc->bufpool_lock, flags); rsrc->tx_wqe_avail_cnt++; @@ -536,7 +537,7 @@ irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info) * @rsrc: resource to use for buffer * @buf: puda buffer to transmit */ -void +int irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf) { @@ -545,17 +546,28 @@ irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, unsigned long flags; spin_lock_irqsave(&rsrc->bufpool_lock, flags); + if (buf) { + if (buf->queued) { + irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, + "PUDA: Attempting to re-send queued buf %p\n", + buf); + spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); + return -EINVAL; + } + + buf->queued = true; + } /* * if no wqe available or not from a completion and we have pending buffers, we must queue new buffer */ if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) { list_add_tail(&buf->list, &rsrc->txpend); - spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); rsrc->stats_sent_pkt_q++; + spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "adding to txpend\n"); - return; + return 0; } rsrc->tx_wqe_avail_cnt--; /* @@ -595,6 +607,7 @@ irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, } done: spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); + return 0; } /** @@ -737,11 +750,14 @@ irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) irdma_qp_add_qos(qp); irdma_puda_qp_setctx(rsrc); + qp->qp_state = IRDMA_QP_STATE_RTS; + if (rsrc->dev->ceq_valid) ret = irdma_cqp_qp_create_cmd(rsrc->dev, qp); else ret = irdma_puda_qp_wqe(rsrc->dev, qp); if (ret) { + qp->qp_state = IRDMA_QP_STATE_INVALID; irdma_qp_rem_qos(qp); rsrc->dev->ws_remove(qp->vsi, qp->user_pri); irdma_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); @@ -964,6 +980,7 @@ irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, irdma_free_hash_desc(rsrc->hash_desc); /* fallthrough */ case PUDA_QP_CREATED: + rsrc->qp.qp_state = IRDMA_QP_STATE_INVALID; irdma_qp_rem_qos(&rsrc->qp); if (!reset) diff --git a/sys/dev/irdma/irdma_puda.h b/sys/dev/irdma/irdma_puda.h index aff435a90ecd..73e5c42f3c09 100644 --- a/sys/dev/irdma/irdma_puda.h +++ b/sys/dev/irdma/irdma_puda.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -94,20 +94,22 @@ struct irdma_puda_buf { u8 *iph; u8 *tcph; u8 *data; + u32 seqnum; + u32 ah_id; + u32 totallen; /* machlen+iphlen+tcphlen+datalen */ u16 datalen; u16 vlan_id; u8 tcphlen; /* tcp length in bytes */ u8 maclen; /* mac length in bytes */ - u32 totallen; /* machlen+iphlen+tcphlen+datalen */ - atomic_t refcount; + atomic_t pb_refcount; u8 hdrlen; bool virtdma:1; bool ipv4:1; bool vlan_valid:1; bool do_lpb:1; /* Loopback buffer */ bool smac_valid:1; - u32 seqnum; - u32 ah_id; + bool queued:1; + struct irdma_sc_ah *ah; u8 smac[ETHER_ADDR_LEN]; struct irdma_sc_vsi *vsi; }; @@ -184,7 +186,7 @@ struct irdma_puda_rsrc { struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc); void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); -void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, +int irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info); int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, diff --git a/sys/dev/irdma/irdma_type.h b/sys/dev/irdma/irdma_type.h index 011245f37779..77db328f940b 100644 --- a/sys/dev/irdma/irdma_type.h +++ b/sys/dev/irdma/irdma_type.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -279,6 +279,7 @@ struct irdma_cqp_init_info { u8 hmc_profile; u8 ena_vf_count; u8 ceqs_per_vf; + u8 timer_slots; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; @@ -414,6 +415,7 @@ struct irdma_sc_cqp { u8 ena_vf_count; u8 timeout_count; u8 ceqs_per_vf; + u8 timer_slots; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; @@ -442,8 +444,8 @@ struct irdma_sc_ceq { struct irdma_sc_dev *dev; struct irdma_ceqe *ceqe_base; void *pbl_list; - u32 ceq_id; u32 elem_cnt; + u16 ceq_id; struct irdma_ring ceq_ring; u8 pbl_chunk_size; u8 tph_val; @@ -466,8 +468,8 @@ struct irdma_sc_cq { struct irdma_sc_vsi *vsi; void *pbl_list; void *back_cq; - u32 ceq_id; u32 shadow_read_threshold; + u16 ceq_id; u8 pbl_chunk_size; u8 cq_type; u8 tph_val; @@ -505,6 +507,7 @@ struct irdma_sc_qp { u8 hw_sq_size; u8 hw_rq_size; u8 src_mac_addr_idx; + bool suspended:1; bool on_qoslist:1; bool ieq_pass_thru:1; bool sq_tph_en:1; @@ -523,12 +526,6 @@ struct irdma_sc_qp { struct list_head list; }; -struct irdma_stats_inst_info { - u16 hmc_fn_id; - u16 stats_idx; - bool use_hmc_fcn_index:1; -}; - struct irdma_up_info { u8 map[8]; u8 cnp_up_override; @@ -540,6 +537,13 @@ struct irdma_up_info { #define IRDMA_MAX_WS_NODES 0x3FF #define IRDMA_WS_NODE_INVALID 0xFFFF +struct irdma_ws_move_node_info { + u16 node_id[16]; + u8 num_nodes; + u8 target_port; + bool resume_traffic:1; +}; + struct irdma_ws_node_info { u16 id; u16 vsi; @@ -582,6 +586,7 @@ struct irdma_config_check { bool lfc_set:1; bool pfc_set:1; u8 traffic_class; + u8 prio; u16 qs_handle; }; @@ -599,7 +604,6 @@ struct irdma_sc_vsi { u32 exception_lan_q; u16 mtu; enum irdma_vm_vf_type vm_vf_type; - bool stats_inst_alloc:1; bool tc_change_pending:1; bool mtu_change_pending:1; struct irdma_vsi_pestat *pestat; @@ -653,7 +657,6 @@ struct irdma_sc_dev { u16 num_vfs; u16 hmc_fn_id; bool ceq_valid:1; - u8 pci_rev; int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_reset)(struct irdma_sc_vsi *vsi); @@ -773,7 +776,7 @@ struct irdma_ceq_init_info { u64 *ceqe_base; void *pbl_list; u32 elem_cnt; - u32 ceq_id; + u16 ceq_id; bool virtual_map:1; bool tph_en:1; bool itr_no_expire:1; @@ -804,8 +807,8 @@ struct irdma_ccq_init_info { __le64 *shadow_area; void *pbl_list; u32 num_elem; - u32 ceq_id; u32 shadow_read_threshold; + u16 ceq_id; bool ceqe_mask:1; bool ceq_id_valid:1; bool avoid_mem_cflct:1; @@ -1004,7 +1007,6 @@ struct irdma_allocate_stag_info { u16 access_rights; bool remote_access:1; bool use_hmc_fcn_index:1; - bool use_pf_rid:1; bool all_memory:1; u16 hmc_fcn_index; }; @@ -1032,7 +1034,6 @@ struct irdma_reg_ns_stag_info { irdma_stag_key stag_key; bool use_hmc_fcn_index:1; u16 hmc_fcn_index; - bool use_pf_rid:1; bool all_memory:1; }; @@ -1056,7 +1057,6 @@ struct irdma_fast_reg_stag_info { bool push_wqe:1; bool use_hmc_fcn_index:1; u16 hmc_fcn_index; - bool use_pf_rid:1; bool defer_flag:1; }; @@ -1065,6 +1065,7 @@ struct irdma_dealloc_stag_info { u32 pd_id; bool mr:1; bool dealloc_pbl:1; + bool skip_flush_markers:1; }; struct irdma_register_shared_stag { @@ -1102,8 +1103,8 @@ struct irdma_cq_init_info { struct irdma_sc_dev *dev; u64 cq_base_pa; u64 shadow_area_pa; - u32 ceq_id; u32 shadow_read_threshold; + u16 ceq_id; u8 pbl_chunk_size; u32 first_pm_pbl_idx; bool virtual_map:1; @@ -1204,7 +1205,7 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, int irdma_sc_ccq_init(struct irdma_sc_cq *ccq, struct irdma_ccq_init_info *info); -int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); +int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq); int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq); @@ -1473,12 +1474,6 @@ struct cqp_info { struct { struct irdma_sc_cqp *cqp; - struct irdma_stats_inst_info info; - u64 scratch; - } stats_manage; - - struct { - struct irdma_sc_cqp *cqp; struct irdma_stats_gather_info info; u64 scratch; } stats_gather; @@ -1491,6 +1486,12 @@ struct cqp_info { struct { struct irdma_sc_cqp *cqp; + struct irdma_ws_move_node_info info; + u64 scratch; + } ws_move_node; + + struct { + struct irdma_sc_cqp *cqp; struct irdma_up_info info; u64 scratch; } up_map; @@ -1508,6 +1509,8 @@ struct cqp_cmds_info { u8 cqp_cmd; u8 post_sq; struct cqp_info in; + int cqp_cmd_exec_status; + bool create; }; __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch, diff --git a/sys/dev/irdma/irdma_uda.h b/sys/dev/irdma/irdma_uda.h index 9850f986ee67..d21b811844af 100644 --- a/sys/dev/irdma/irdma_uda.h +++ b/sys/dev/irdma/irdma_uda.h @@ -44,6 +44,7 @@ struct irdma_sc_cqp; struct irdma_ah_info { struct irdma_sc_vsi *vsi; struct irdma_cqp_request *cqp_request; + atomic_t ah_refcnt; u32 pd_idx; u32 dst_arpindex; u32 dest_ip_addr[4]; @@ -63,6 +64,7 @@ struct irdma_ah_info { struct irdma_sc_ah { struct irdma_sc_dev *dev; struct irdma_ah_info ah_info; + struct work_struct ah_free_work; }; int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, diff --git a/sys/dev/irdma/irdma_uda_d.h b/sys/dev/irdma/irdma_uda_d.h index bbf66bd8680d..1907abcecf12 100644 --- a/sys/dev/irdma/irdma_uda_d.h +++ b/sys/dev/irdma/irdma_uda_d.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2016 - 2021 Intel Corporation + * Copyright (c) 2016 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -183,6 +183,8 @@ #define IRDMA_UDA_CQPSQ_MAV_WQEVALID BIT_ULL(63) #define IRDMA_UDA_CQPSQ_MAV_OPCODE_S 32 #define IRDMA_UDA_CQPSQ_MAV_OPCODE GENMASK_ULL(37, 32) +#define IRDMA_UDA_CQPSQ_MAV_TYPE_S 53 +#define IRDMA_UDA_CQPSQ_MAV_TYPE GENMASK_ULL(54, 53) #define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK_S 62 #define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK BIT_ULL(62) #define IRDMA_UDA_CQPSQ_MAV_IPV4VALID_S 59 diff --git a/sys/dev/irdma/irdma_uk.c b/sys/dev/irdma/irdma_uk.c index 6c2e2dfb0031..cbe80fc59722 100644 --- a/sys/dev/irdma/irdma_uk.c +++ b/sys/dev/irdma/irdma_uk.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -133,16 +133,18 @@ irdma_nop_1(struct irdma_qp_uk *qp) void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) { - __le64 *wqe; + struct irdma_qp_quanta *sq; u32 wqe_idx; if (!(qp_wqe_idx & 0x7F)) { wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; - wqe = qp->sq_base[wqe_idx].elem; + sq = qp->sq_base + wqe_idx; if (wqe_idx) - memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); + memset(sq, qp->swqe_polarity ? 0 : 0xFF, + 128 * sizeof(*sq)); else - memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); + memset(sq, qp->swqe_polarity ? 0xFF : 0, + 128 * sizeof(*sq)); } } @@ -200,22 +202,65 @@ irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) qp->push_dropped = false; } +/** + * irdma_qp_push_wqe - setup push wqe and ring db + * @qp: hw qp ptr + * @wqe: wqe ptr + * @quanta: numbers of quanta in wqe + * @wqe_idx: wqe index + * @push_wqe: if to use push for the wqe + */ void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 * wqe, u16 quanta, - u32 wqe_idx, bool post_sq) + u32 wqe_idx, bool push_wqe) { __le64 *push; - if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != - IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && - !qp->push_mode) { - irdma_uk_qp_post_wr(qp); - } else { + if (push_wqe) { push = (__le64 *) ((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); irdma_memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); irdma_qp_ring_push_db(qp, wqe_idx); + qp->last_push_db = true; + } else if (qp->last_push_db) { + qp->last_push_db = false; + db_wr32(qp->qp_id, qp->wqe_alloc_db); + } else { + irdma_uk_qp_post_wr(qp); + } +} + +/** + * irdma_push_ring_free - check if sq ring free to pust push wqe + * @qp: hw qp ptr + */ +static inline bool +irdma_push_ring_free(struct irdma_qp_uk *qp) +{ + u32 head, tail; + + head = IRDMA_RING_CURRENT_HEAD(qp->initial_ring); + tail = IRDMA_RING_CURRENT_TAIL(qp->sq_ring); + + if (head == tail || head == (tail + 1)) + return true; + + return false; +} + +/** + * irdma_enable_push_wqe - depending on sq ring and total size + * @qp: hw qp ptr + * @total_size: total data size + */ +static inline bool +irdma_enable_push_wqe(struct irdma_qp_uk *qp, u32 total_size) +{ + if (irdma_push_ring_free(qp) && + total_size <= qp->uk_attrs->max_hw_push_len) { + return true; } + return false; } /** @@ -234,7 +279,8 @@ irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, __le64 *wqe; __le64 *wqe_0 = NULL; u32 nop_wqe_idx; - u16 avail_quanta, wqe_quanta = *quanta; + u16 wqe_quanta = *quanta; + u16 avail_quanta; u16 i; avail_quanta = qp->uk_attrs->max_hw_sq_chunk - @@ -330,7 +376,7 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) @@ -350,11 +396,13 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); @@ -399,8 +447,8 @@ irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -429,7 +477,7 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, u16 quanta; u64 hdr; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe &= qp->push_db ? true : false; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) @@ -451,7 +499,6 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, qp->ord_cnt = 0; } - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; addl_frag_cnt = op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; local_fence |= info->local_fence; @@ -490,8 +537,8 @@ irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -517,7 +564,7 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; + info->push_wqe = false; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) @@ -534,6 +581,9 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, if (ret_code) return ret_code; + if (qp->push_db) + info->push_wqe = irdma_enable_push_wqe(qp, total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; @@ -587,8 +637,8 @@ irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -748,11 +798,11 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; - qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; read_fence |= info->read_fence; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); @@ -780,8 +830,8 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -854,8 +904,8 @@ irdma_uk_inline_send(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -905,8 +955,8 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - if (info->push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); + if (qp->push_db) + irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, info->push_wqe); else if (post_sq) irdma_uk_qp_post_wr(qp); @@ -1065,29 +1115,6 @@ irdma_check_rq_cqe(struct irdma_qp_uk *qp, u32 *array_idx) } /** - * irdma_skip_duplicate_flush_cmpl - check last cmpl and update wqe if needed - * - * @ring: sq/rq ring - * @flush_seen: information if flush for specific ring was already seen - * @comp_status: completion status - * @wqe_idx: new value of WQE index returned if there is more work on ring - */ -static inline int -irdma_skip_duplicate_flush_cmpl(struct irdma_ring ring, u8 flush_seen, - enum irdma_cmpl_status comp_status, - u32 *wqe_idx) -{ - if (flush_seen) { - if (IRDMA_RING_MORE_WORK(ring)) - *wqe_idx = ring.tail; - else - return -ENOENT; - } - - return 0; -} - -/** * irdma_detect_unsignaled_cmpls - check if unsignaled cmpl is to be reported * @cq: hw cq * @qp: hw qp @@ -1140,6 +1167,28 @@ irdma_detect_unsignaled_cmpls(struct irdma_cq_uk *cq, } /** + * irdma_uk_cq_empty - Check if CQ is empty + * @cq: hw cq + */ +bool +irdma_uk_cq_empty(struct irdma_cq_uk *cq) +{ + __le64 *cqe; + u8 polarity; + u64 qword3; + + if (cq->avoid_mem_cflct) + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); + else + cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); + + get_64bit_val(cqe, 24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + + return polarity != cq->polarity; +} + +/** * irdma_uk_cq_poll_cmpl - get cq completion info * @cq: hw cq * @info: cq poll information returned @@ -1158,6 +1207,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, u8 polarity; bool ext_valid; __le64 *ext_cqe; + unsigned long flags; if (cq->avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); @@ -1229,6 +1279,10 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; + if (!qp || qp->destroy_pending) { + ret_code = -EFAULT; + goto exit; + } if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); @@ -1253,15 +1307,10 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); - info->stat.raw = (u32)FIELD_GET(IRDMACQ_TCPSQN_ROCEPSN_RTT_TS, qword0); info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); - if (!qp || qp->destroy_pending) { - ret_code = -EFAULT; - goto exit; - } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle) (irdma_uintptr) qp; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); @@ -1269,51 +1318,42 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; - ret_code = irdma_skip_duplicate_flush_cmpl(qp->rq_ring, - qp->rq_flush_seen, - info->comp_status, - &wqe_idx); - if (ret_code != 0) - goto exit; - array_idx = wqe_idx / qp->rq_wqe_size_multiplier; + info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); + info->signaled = 1; + + if (qword3 & IRDMACQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); + } else { + info->stag_invalid_set = false; + } if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { + spin_lock_irqsave(qp->lock, flags); if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { ret_code = -ENOENT; + spin_unlock_irqrestore(qp->lock, flags); goto exit; } info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail]; - info->signaled = 1; - array_idx = qp->rq_ring.tail; + IRDMA_RING_SET_TAIL(qp->rq_ring, qp->rq_ring.tail + 1); + if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) + qp->rq_flush_complete = true; + else + move_cq_head = false; + spin_unlock_irqrestore(qp->lock, flags); } else { info->wr_id = qp->rq_wrid_array[array_idx]; - info->signaled = 1; if (irdma_check_rq_cqe(qp, &array_idx)) { info->wr_id = qp->rq_wrid_array[array_idx]; info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); return 0; } - } - - info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); - - if (qword3 & IRDMACQ_STAG) { - info->stag_invalid_set = true; - info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); - } else { - info->stag_invalid_set = false; - } - IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); - if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { - qp->rq_flush_seen = true; - if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) - qp->rq_flush_complete = true; - else - move_cq_head = false; + IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); } pring = &qp->rq_ring; } else { /* q_type is IRDMA_CQE_QTYPE_SQ */ @@ -1335,12 +1375,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, qp->push_mode = false; qp->push_dropped = true; } - ret_code = irdma_skip_duplicate_flush_cmpl(qp->sq_ring, - qp->sq_flush_seen, - info->comp_status, - &wqe_idx); - if (ret_code != 0) - goto exit; if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; info->signaled = qp->sq_wrtrk_array[wqe_idx].signaled; @@ -1353,8 +1387,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, IRDMA_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { - unsigned long flags; - spin_lock_irqsave(qp->lock, flags); if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { spin_unlock_irqrestore(qp->lock, flags); @@ -1386,7 +1418,6 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, if (info->op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; - qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) qp->sq_flush_complete = true; spin_unlock_irqrestore(qp->lock, flags); @@ -1416,8 +1447,9 @@ exit: IRDMA_RING_MOVE_TAIL(cq->cq_ring); if (!cq->avoid_mem_cflct && ext_valid) IRDMA_RING_MOVE_TAIL(cq->cq_ring); - set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, - IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); + if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq)) + set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, + IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); } else { qword3 &= ~IRDMA_CQ_WQEIDX; qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); @@ -1482,15 +1514,16 @@ irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - - *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)sq_size << shift) + IRDMA_SQ_RSVD); - if (*sqdepth < min_size) - *sqdepth = min_size; - else if (*sqdepth > uk_attrs->max_hw_wq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_wq_quanta) return -EINVAL; + *sqdepth = hw_quanta; return 0; } @@ -1501,15 +1534,16 @@ irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *s int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { - u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; - - *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); + u32 min_hw_quanta = (u32)uk_attrs->min_hw_wq_size << shift; + u64 hw_quanta = + irdma_round_up_wq(((u64)rq_size << shift) + IRDMA_RQ_RSVD); - if (*rqdepth < min_size) - *rqdepth = min_size; - else if (*rqdepth > uk_attrs->max_hw_rq_quanta) + if (hw_quanta < min_hw_quanta) + hw_quanta = min_hw_quanta; + else if (hw_quanta > uk_attrs->max_hw_rq_quanta) return -EINVAL; + *rqdepth = hw_quanta; return 0; } diff --git a/sys/dev/irdma/irdma_user.h b/sys/dev/irdma/irdma_user.h index 05828ebbd7d6..71b08d9d486d 100644 --- a/sys/dev/irdma/irdma_user.h +++ b/sys/dev/irdma/irdma_user.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -184,12 +184,10 @@ enum irdma_device_caps_const { IRDMA_MIN_IW_QP_ID = 0, IRDMA_QUERY_FPM_BUF_SIZE = 176, IRDMA_COMMIT_FPM_BUF_SIZE = 176, - IRDMA_MAX_IW_QP_ID = 262143, IRDMA_MIN_CEQID = 0, IRDMA_MAX_CEQID = 1023, IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, IRDMA_MIN_CQID = 0, - IRDMA_MAX_CQID = 524287, IRDMA_MIN_AEQ_ENTRIES = 1, IRDMA_MAX_AEQ_ENTRIES = 524287, IRDMA_MIN_CEQ_ENTRIES = 1, @@ -284,7 +282,7 @@ struct irdma_cq_uk_init_info; struct irdma_ring { volatile u32 head; - volatile u32 tail; /* effective tail */ + volatile u32 tail; u32 size; }; @@ -385,12 +383,6 @@ struct irdma_cq_poll_info { bool ud_smac_valid:1; bool imm_valid:1; bool signaled:1; - union { - u32 tcp_sqn; - u32 roce_psn; - u32 rtt; - u32 raw; - } stat; }; struct qp_err_code { @@ -426,6 +418,7 @@ struct irdma_wqe_uk_ops { struct irdma_bind_window *op_info); }; +bool irdma_uk_cq_empty(struct irdma_cq_uk *cq); int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, @@ -465,6 +458,8 @@ struct irdma_qp_uk { __le64 *shadow_area; __le32 *push_db; __le64 *push_wqe; + void *push_db_map; + void *push_wqe_map; struct irdma_ring sq_ring; struct irdma_ring sq_sig_ring; struct irdma_ring rq_ring; @@ -494,12 +489,11 @@ struct irdma_qp_uk { bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ + bool last_push_db:1; /* Indicates last DB was push DB */ void *back_qp; spinlock_t *lock; u8 dbg_rq_flushed; u16 ord_cnt; - u8 sq_flush_seen; - u8 rq_flush_seen; u8 rd_fence_rate; }; @@ -563,10 +557,12 @@ int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); -int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth); -int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth); +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, + u8 shift, u32 *sqdepth); +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, + u8 shift, u32 *rqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq); + u32 wqe_idx, bool push_wqe); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id) diff --git a/sys/dev/irdma/irdma_utils.c b/sys/dev/irdma/irdma_utils.c index 038f1980082b..ef8cb38d8139 100644 --- a/sys/dev/irdma/irdma_utils.c +++ b/sys/dev/irdma/irdma_utils.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +37,7 @@ LIST_HEAD(irdma_handlers); DEFINE_SPINLOCK(irdma_handler_lock); +static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS]; static const struct ae_desc ae_desc_list[] = { {IRDMA_AE_AMP_UNALLOCATED_STAG, "Unallocated memory key (L-Key/R-Key)"}, {IRDMA_AE_AMP_INVALID_STAG, "Invalid memory key (L-Key/R-Key)"}, @@ -206,7 +207,7 @@ irdma_get_ae_desc(u16 ae_id) * @rf: RDMA PCI function * @ip_addr: ip address for device * @mac_addr: mac address ptr - * @action: modify, delete or add + * @action: modify, delete or add/update */ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, @@ -220,22 +221,22 @@ irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, spin_lock_irqsave(&rf->arp_lock, flags); for (arp_index = 0; (u32)arp_index < rf->arp_table_size; arp_index++) { - if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip))) + if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip)) && + !rf->arp_table[arp_index].delete_pending) break; } switch (action) { - case IRDMA_ARP_ADD: - if (arp_index != rf->arp_table_size) { - arp_index = -1; - break; - } - - arp_index = 0; - if (irdma_alloc_rsrc(rf, rf->allocated_arps, rf->arp_table_size, - (u32 *)&arp_index, &rf->next_arp_index)) { - arp_index = -1; - break; + case IRDMA_ARP_ADD_UPDATE: /* ARP Add or Update */ + if (arp_index == rf->arp_table_size) { + if (irdma_alloc_rsrc(rf, rf->allocated_arps, + rf->arp_table_size, + (u32 *)&arp_index, + &rf->next_arp_index)) { + arp_index = -1; + break; + } + atomic_set(&rf->arp_table[arp_index].refcnt, 0); } memcpy(rf->arp_table[arp_index].ip_addr, ip, @@ -252,10 +253,16 @@ irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, break; } - memset(rf->arp_table[arp_index].ip_addr, 0, - sizeof(rf->arp_table[arp_index].ip_addr)); - eth_zero_addr(rf->arp_table[arp_index].mac_addr); - irdma_free_rsrc(rf, rf->allocated_arps, arp_index); + if (!atomic_read(&rf->arp_table[arp_index].refcnt)) { + memset(rf->arp_table[arp_index].ip_addr, 0, + sizeof(rf->arp_table[arp_index].ip_addr)); + eth_zero_addr(rf->arp_table[arp_index].mac_addr); + irdma_free_rsrc(rf, rf->allocated_arps, arp_index); + rf->arp_table[arp_index].delete_pending = false; + } else { + rf->arp_table[arp_index].delete_pending = true; + arp_index = -1; /* prevent immediate CQP ARP index deletion */ + } break; default: arp_index = -1; @@ -266,8 +273,61 @@ irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, return arp_index; } +static int +irdma_get_arp(struct irdma_pci_f *rf, u16 arp_index) +{ + unsigned long flags; + u32 ip_zero[4] = {}; + + if (arp_index >= rf->arp_table_size) + return -EINVAL; + + spin_lock_irqsave(&rf->arp_lock, flags); + if (!memcmp(rf->arp_table[arp_index].ip_addr, ip_zero, sizeof(ip_zero))) { + spin_unlock_irqrestore(&rf->arp_lock, flags); + return -EINVAL; + } + if (!atomic_read(&rf->arp_table[arp_index].refcnt)) + atomic_set(&rf->arp_table[arp_index].refcnt, 1); + else + atomic_inc(&rf->arp_table[arp_index].refcnt); + spin_unlock_irqrestore(&rf->arp_lock, flags); + + return 0; +} + +static void +irdma_put_arp(struct irdma_pci_f *rf, u16 arp_index) +{ + unsigned long flags; + + if (arp_index >= rf->arp_table_size) + return; + spin_lock_irqsave(&rf->arp_lock, flags); + if (!atomic_dec_and_test(&rf->arp_table[arp_index].refcnt)) { + spin_unlock_irqrestore(&rf->arp_lock, flags); + return; + } + + if (rf->arp_table[arp_index].delete_pending) { + u32 ip_addr[4]; + + memcpy(ip_addr, rf->arp_table[arp_index].ip_addr, + sizeof(ip_addr)); + memset(rf->arp_table[arp_index].ip_addr, 0, + sizeof(rf->arp_table[arp_index].ip_addr)); + eth_zero_addr(rf->arp_table[arp_index].mac_addr); + spin_unlock_irqrestore(&rf->arp_lock, flags); + irdma_arp_cqp_op(rf, arp_index, NULL, IRDMA_ARP_DELETE); + rf->arp_table[arp_index].delete_pending = false; + irdma_free_rsrc(rf, rf->allocated_arps, arp_index); + } else { + spin_unlock_irqrestore(&rf->arp_lock, flags); + } +} + /** - * irdma_add_arp - add a new arp entry if needed + * irdma_add_arp - add a new arp entry if needed and resolve it * @rf: RDMA function * @ip: IP address * @mac: MAC address @@ -275,18 +335,7 @@ irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, const u8 *mac) { - int arpidx; - - arpidx = irdma_arp_table(rf, &ip[0], NULL, IRDMA_ARP_RESOLVE); - if (arpidx >= 0) { - if (ether_addr_equal(rf->arp_table[arpidx].mac_addr, mac)) - return arpidx; - - irdma_manage_arp_cache(rf, rf->arp_table[arpidx].mac_addr, ip, - IRDMA_ARP_DELETE); - } - - irdma_manage_arp_cache(rf, mac, ip, IRDMA_ARP_ADD); + irdma_manage_arp_cache(rf, mac, ip, IRDMA_ARP_ADD_UPDATE); return irdma_arp_table(rf, ip, NULL, IRDMA_ARP_RESOLVE); } @@ -378,6 +427,8 @@ irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp, atomic_set(&cqp_request->refcnt, 1); memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info)); + memset(&cqp_request->info, 0, sizeof(cqp_request->info)); + return cqp_request; } @@ -503,6 +554,17 @@ irdma_wait_event(struct irdma_pci_f *rf, READ_ONCE(cqp_request->request_done), msecs_to_jiffies(wait_time_ms))) break; + if (cqp_request->info.cqp_cmd_exec_status) { + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_CQP, + "%s (%d) cqp op error status reported: %d, %d %x %x\n", + irdma_cqp_cmd_names[cqp_request->info.cqp_cmd], + cqp_request->info.cqp_cmd, + cqp_request->info.cqp_cmd_exec_status, + cqp_request->compl_info.error, + cqp_request->compl_info.maj_err_code, + cqp_request->compl_info.min_err_code); + break; + } irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev); @@ -540,7 +602,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = { [IRDMA_OP_DELETE_ARP_CACHE_ENTRY] = "Delete ARP Cache Cmd", [IRDMA_OP_MANAGE_APBVT_ENTRY] = "Manage APBV Table Entry Cmd", [IRDMA_OP_CEQ_CREATE] = "CEQ Create Cmd", - [IRDMA_OP_AEQ_CREATE] = "AEQ Destroy Cmd", + [IRDMA_OP_AEQ_CREATE] = "AEQ Create Cmd", [IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY] = "Manage Quad Hash Table Entry Cmd", [IRDMA_OP_QP_MODIFY] = "Modify QP Cmd", [IRDMA_OP_QP_UPLOAD_CONTEXT] = "Upload Context Cmd", @@ -635,7 +697,7 @@ irdma_handle_cqp_op(struct irdma_pci_f *rf, bool put_cqp_request = true; if (rf->reset) - return 0; + return (info->create ? -EBUSY : 0); irdma_get_cqp_request(cqp_request); status = irdma_process_cqp_cmd(dev, info); @@ -654,13 +716,23 @@ irdma_handle_cqp_op(struct irdma_pci_f *rf, err: if (irdma_cqp_crit_err(dev, info->cqp_cmd, cqp_request->compl_info.maj_err_code, - cqp_request->compl_info.min_err_code)) + cqp_request->compl_info.min_err_code)) { + int qpn = -1; + + if (info->cqp_cmd == IRDMA_OP_QP_CREATE) + qpn = cqp_request->info.in.u.qp_create.qp->qp_uk.qp_id; + else if (info->cqp_cmd == IRDMA_OP_QP_MODIFY) + qpn = cqp_request->info.in.u.qp_modify.qp->qp_uk.qp_id; + else if (info->cqp_cmd == IRDMA_OP_QP_DESTROY) + qpn = cqp_request->info.in.u.qp_destroy.qp->qp_uk.qp_id; + irdma_dev_err(&rf->iwdev->ibdev, - "[%s Error][op_code=%d] status=%d waiting=%d completion_err=%d maj=0x%x min=0x%x\n", - irdma_cqp_cmd_names[info->cqp_cmd], info->cqp_cmd, status, + "[%s Error] status=%d waiting=%d completion_err=%d maj=0x%x min=0x%x qpn=%d\n", + irdma_cqp_cmd_names[info->cqp_cmd], status, cqp_request->waiting, cqp_request->compl_info.error, cqp_request->compl_info.maj_err_code, - cqp_request->compl_info.min_err_code); + cqp_request->compl_info.min_err_code, qpn); + } if (put_cqp_request) irdma_put_cqp_request(&rf->cqp, cqp_request); @@ -715,7 +787,7 @@ irdma_cq_rem_ref(struct ib_cq *ibcq) return; } - rf->cq_table[iwcq->cq_num] = NULL; + WRITE_ONCE(rf->cq_table[iwcq->cq_num], NULL); spin_unlock_irqrestore(&rf->cqtable_lock, flags); complete(&iwcq->free_cq); } @@ -1009,6 +1081,7 @@ irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) cqp_info->post_sq = 1; cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); @@ -1037,13 +1110,13 @@ irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; - memset(qp_info, 0, sizeof(*qp_info)); qp_info->cq_num_valid = true; qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS; cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.qp_create.qp = qp; cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); @@ -1060,10 +1133,10 @@ void irdma_dealloc_push_page(struct irdma_pci_f *rf, struct irdma_qp *iwqp) { + struct irdma_sc_qp *qp = &iwqp->sc_qp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; - struct irdma_sc_qp *qp = &iwqp->sc_qp; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) return; @@ -1081,6 +1154,7 @@ irdma_dealloc_push_page(struct irdma_pci_f *rf, cqp_info->in.u.manage_push_page.info.push_page_type = 0; cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; + status = irdma_handle_cqp_op(rf, cqp_request); if (!status) qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; @@ -1160,6 +1234,7 @@ irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, cqp_info->post_sq = 1; cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp; cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request; + cqp_info->create = false; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) { @@ -1241,7 +1316,6 @@ irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) return -ENOMEM; cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY; cqp_info->post_sq = 1; cqp_info->in.u.qp_destroy.qp = qp; @@ -1306,7 +1380,7 @@ irdma_ieq_get_qp(struct irdma_sc_dev *dev, return NULL; iwqp = cm_node->iwqp; - irdma_rem_ref_cm_node(cm_node); + irdma_rem_ref_cmnode(cm_node); return &iwqp->sc_qp; } @@ -1598,7 +1672,6 @@ irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, return -ENOMEM; cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER; cqp_info->post_sq = 1; cqp_info->in.u.stats_gather.info = pestat->gather_info; @@ -1616,44 +1689,6 @@ irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, } /** - * irdma_cqp_stats_inst_cmd - Allocate/free stats instance - * @vsi: pointer to vsi structure - * @cmd: command to allocate or free - * @stats_info: pointer to allocate stats info - */ -int -irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info) -{ - struct irdma_pci_f *rf = dev_to_rf(vsi->dev); - struct irdma_cqp *iwcqp = &rf->cqp; - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - int status; - bool wait = false; - - if (cmd == IRDMA_OP_STATS_ALLOCATE) - wait = true; - cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); - cqp_info->cqp_cmd = cmd; - cqp_info->post_sq = 1; - cqp_info->in.u.stats_manage.info = *stats_info; - cqp_info->in.u.stats_manage.scratch = (uintptr_t)cqp_request; - cqp_info->in.u.stats_manage.cqp = &rf->cqp.sc_cqp; - status = irdma_handle_cqp_op(rf, cqp_request); - if (wait) - stats_info->stats_idx = cqp_request->compl_info.op_ret_val; - irdma_put_cqp_request(iwcqp, cqp_request); - - return status; -} - -/** * irdma_cqp_ceq_cmd - Create/Destroy CEQ's after CEQ 0 * @dev: pointer to device info * @sc_ceq: pointer to ceq structure @@ -1743,12 +1778,12 @@ irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, return -ENOMEM; cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = cmd; cqp_info->post_sq = 1; cqp_info->in.u.ws_node.info = *node_info; cqp_info->in.u.ws_node.cqp = cqp; cqp_info->in.u.ws_node.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); if (status) goto exit; @@ -1773,7 +1808,7 @@ exit: } /** - * irdma_ah_cqp_op - perform an AH cqp operation + * irdma_ah_do_cqp - perform an AH cqp operation * @rf: RDMA PCI function * @sc_ah: address handle * @cmd: AH operation @@ -1783,8 +1818,8 @@ exit: * * returns errno */ -int -irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, +static int +irdma_ah_do_cqp(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, bool wait, void (*callback_fcn) (struct irdma_cqp_request *), void *cb_param) @@ -1811,6 +1846,7 @@ irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, cqp_info->in.u.ah_create.info = sc_ah->ah_info; cqp_info->in.u.ah_create.scratch = (uintptr_t)cqp_request; cqp_info->in.u.ah_create.cqp = &rf->cqp.sc_cqp; + cqp_info->create = true; } else if (cmd == IRDMA_OP_AH_DESTROY) { cqp_info->in.u.ah_destroy.info = sc_ah->ah_info; cqp_info->in.u.ah_destroy.scratch = (uintptr_t)cqp_request; @@ -1833,6 +1869,36 @@ irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, return 0; } +int +irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, + bool wait, + void (*callback_fcn) (struct irdma_cqp_request *), + void *cb_param) +{ + int status; + + if (cmd == IRDMA_OP_AH_CREATE) { + status = irdma_get_arp(rf, sc_ah->ah_info.dst_arpindex); + if (status) { + irdma_dev_err(&rf->iwdev->ibdev, "%s get_arp failed for index = %d\n", + __func__, sc_ah->ah_info.dst_arpindex); + + return -EINVAL; + } + status = irdma_ah_do_cqp(rf, sc_ah, cmd, wait, callback_fcn, + cb_param); + if (status) + irdma_put_arp(rf, sc_ah->ah_info.dst_arpindex); + } else { + status = irdma_ah_do_cqp(rf, sc_ah, cmd, wait, callback_fcn, + cb_param); + if (cmd == IRDMA_OP_AH_DESTROY) + irdma_put_arp(rf, sc_ah->ah_info.dst_arpindex); + } + + return status; +} + /** * irdma_ieq_ah_cb - callback after creation of AH for IEQ * @cqp_request: pointer to cqp_request of create AH @@ -1853,6 +1919,9 @@ irdma_ieq_ah_cb(struct irdma_cqp_request *cqp_request) irdma_ieq_cleanup_qp(qp->vsi->ieq, qp); } spin_unlock_irqrestore(&qp->pfpdu.lock, flags); + if (!cqp_request->waiting) + irdma_put_cqp_request(sc_ah->dev->cqp->back_cqp, + cqp_request); } /** @@ -1867,6 +1936,9 @@ irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request) sc_ah->ah_info.ah_valid = !cqp_request->compl_info.op_ret_val; irdma_add_conn_est_qh(cm_node); + if (!cqp_request->waiting) + irdma_put_cqp_request(sc_ah->dev->cqp->back_cqp, + cqp_request); } /** @@ -2239,39 +2311,35 @@ clear_qp_ctx_addr(__le64 * ctx) /** * irdma_upload_qp_context - upload raw QP context - * @iwqp: QP pointer + * @rf: RDMA PCI function + * @qpn: QP ID + * @qp_type: QP Type * @freeze: freeze QP * @raw: raw context flag */ int -irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw) +irdma_upload_qp_context(struct irdma_pci_f *rf, u32 qpn, + u8 qp_type, bool freeze, bool raw) { struct irdma_dma_mem dma_mem; struct irdma_sc_dev *dev; - struct irdma_sc_qp *qp; struct irdma_cqp *iwcqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_upload_context_info *info; - struct irdma_pci_f *rf; int ret; u32 *ctx; - rf = iwqp->iwdev->rf; - if (!rf) - return -EINVAL; - - qp = &iwqp->sc_qp; dev = &rf->sc_dev; iwcqp = &rf->cqp; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); - if (!cqp_request) + if (!cqp_request) { + irdma_debug((dev), IRDMA_DEBUG_QP, "Could not get CQP req for QP [%u]\n", qpn); return -EINVAL; - + } cqp_info = &cqp_request->info; info = &cqp_info->in.u.qp_upload_context.info; - memset(info, 0, sizeof(struct irdma_upload_context_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_UPLOAD_CONTEXT; cqp_info->post_sq = 1; cqp_info->in.u.qp_upload_context.dev = dev; @@ -2281,6 +2349,7 @@ irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw) dma_mem.va = irdma_allocate_dma_mem(dev->hw, &dma_mem, dma_mem.size, PAGE_SIZE); if (!dma_mem.va) { irdma_put_cqp_request(&rf->cqp, cqp_request); + irdma_debug((dev), IRDMA_DEBUG_QP, "Could not allocate buffer for QP [%u]\n", qpn); return -ENOMEM; } @@ -2288,20 +2357,21 @@ irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw) info->buf_pa = dma_mem.pa; info->raw_format = raw; info->freeze_qp = freeze; - info->qp_type = qp->qp_uk.qp_type; /* 1 is iWARP and 2 UDA */ - info->qp_id = qp->qp_uk.qp_id; + info->qp_type = qp_type; /* 1 is iWARP and 2 UDA */ + info->qp_id = qpn; ret = irdma_handle_cqp_op(rf, cqp_request); if (ret) goto error; - irdma_debug(dev, IRDMA_DEBUG_QP, "PRINT CONTXT QP [%d]\n", info->qp_id); + irdma_debug((dev), IRDMA_DEBUG_QP, "PRINT CONTXT QP [%u]\n", info->qp_id); { u32 i, j; clear_qp_ctx_addr(dma_mem.va); for (i = 0, j = 0; i < 32; i++, j += 4) - irdma_debug(dev, IRDMA_DEBUG_QP, - "%d:\t [%08X %08x %08X %08X]\n", (j * 4), - ctx[j], ctx[j + 1], ctx[j + 2], ctx[j + 3]); + irdma_debug((dev), IRDMA_DEBUG_QP, + "[%u] %u:\t [%08X %08x %08X %08X]\n", + info->qp_id, (j * 4), ctx[j], ctx[j + 1], + ctx[j + 2], ctx[j + 3]); } error: irdma_put_cqp_request(iwcqp, cqp_request); @@ -2310,20 +2380,41 @@ error: return ret; } -bool -irdma_cq_empty(struct irdma_cq *iwcq) +static bool +qp_has_unpolled_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq) { - struct irdma_cq_uk *ukcq; - u64 qword3; + struct irdma_cq_uk *cq = &iwcq->sc_cq.cq_uk; + struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; + u32 cq_head = IRDMA_RING_CURRENT_HEAD(cq->cq_ring); + u64 qword3, comp_ctx; __le64 *cqe; - u8 polarity; + u8 polarity, cq_polarity; + + cq_polarity = cq->polarity; + do { + if (cq->avoid_mem_cflct) + cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf; + else + cqe = cq->cq_base[cq_head].buf; + get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + + if (polarity != cq_polarity) + break; + + /* Ensure CQE contents are read after valid bit is checked */ + rmb(); + + get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); + if ((struct irdma_qp_uk *)(irdma_uintptr) comp_ctx == qp) + return true; - ukcq = &iwcq->sc_cq.cq_uk; - cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); - get_64bit_val(cqe, 24, &qword3); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + cq_head = (cq_head + 1) % cq->cq_ring.size; + if (!cq_head) + cq_polarity ^= 1; + } while (true); - return polarity != ukcq->polarity; + return false; } void @@ -2380,11 +2471,18 @@ irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, static inline void irdma_comp_handler(struct irdma_cq *cq) { + struct irdma_device *iwdev = to_iwdev(cq->ibcq.device); + struct irdma_ceq *ceq = &iwdev->rf->ceqlist[cq->sc_cq.ceq_id]; + unsigned long flags; + if (!cq->ibcq.comp_handler) return; - if (atomic_cmpxchg(&cq->armed, 1, 0)) + if (atomic_read(&cq->armed)) { + spin_lock_irqsave(&ceq->ce_lock, flags); cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + spin_unlock_irqrestore(&ceq->ce_lock, flags); + } } /** @@ -2405,7 +2503,7 @@ irdma_generate_flush_completions(struct irdma_qp *iwqp) unsigned long flags1; spin_lock_irqsave(&iwqp->iwscq->lock, flags1); - if (irdma_cq_empty(iwqp->iwscq)) { + if (!qp_has_unpolled_cqes(iwqp, iwqp->iwscq)) { unsigned long flags2; spin_lock_irqsave(&iwqp->lock, flags2); @@ -2452,7 +2550,7 @@ irdma_generate_flush_completions(struct irdma_qp *iwqp) } spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); - if (irdma_cq_empty(iwqp->iwrcq)) { + if (!qp_has_unpolled_cqes(iwqp, iwqp->iwrcq)) { unsigned long flags2; spin_lock_irqsave(&iwqp->lock, flags2); @@ -2527,3 +2625,49 @@ irdma_udqp_qs_worker(struct work_struct *work) irdma_qp_rem_ref(&udqs_work->iwqp->ibqp); kfree(udqs_work); } + +void +irdma_chk_free_stag(struct irdma_pci_f *rf) +{ + struct irdma_cqp_request *cqp_request; + struct cqp_cmds_info *cqp_info; + struct irdma_dealloc_stag_info *info; + + cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); + if (!cqp_request) + return; + + cqp_info = &cqp_request->info; + info = &cqp_info->in.u.dealloc_stag.info; + info->stag_idx = RS_64_1(rf->chk_stag, IRDMA_CQPSQ_STAG_IDX_S); + cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; + cqp_info->post_sq = 1; + cqp_info->in.u.dealloc_stag.dev = &rf->sc_dev; + cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; + irdma_handle_cqp_op(rf, cqp_request); + irdma_put_cqp_request(&rf->cqp, cqp_request); +} + +void +cqp_poll_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct irdma_pci_f *rf = container_of(dwork, struct irdma_pci_f, dwork_cqp_poll); + struct irdma_mr iwmr = {}; + struct irdma_pd *iwpd; + + iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL); + if (!iwpd) + return; + iwmr.stag = rf->chk_stag; + iwmr.ibmw.type = IB_MW_TYPE_1; + iwmr.ibmr.pd = &iwpd->ibpd; + if (irdma_hw_alloc_mw(rf->iwdev, &iwmr)) + goto exit; + irdma_chk_free_stag(rf); + + mod_delayed_work(rf->iwdev->cleanup_wq, &rf->dwork_cqp_poll, + msecs_to_jiffies(3000)); +exit: + kfree(iwpd); +} diff --git a/sys/dev/irdma/irdma_verbs.c b/sys/dev/irdma/irdma_verbs.c index 59d4bf392562..a131286d1d37 100644 --- a/sys/dev/irdma/irdma_verbs.c +++ b/sys/dev/irdma/irdma_verbs.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -60,7 +60,6 @@ irdma_query_device(struct ib_device *ibdev, irdma_fw_minor_ver(&rf->sc_dev); props->device_cap_flags = IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS; - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; props->vendor_id = pcidev->vendor; props->vendor_part_id = pcidev->device; props->hw_ver = pcidev->revision; @@ -229,6 +228,7 @@ irdma_alloc_push_page(struct irdma_qp *iwqp) cqp_info->in.u.manage_push_page.info.push_page_type = 0; cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (!status && cqp_request->compl_info.op_ret_val < @@ -273,9 +273,20 @@ irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq) { struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk; unsigned long flags; + struct irdma_cmpl_gen *cmpl_node; + struct list_head *tmp_node, *list_node; spin_lock_irqsave(&iwcq->lock, flags); irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq); + + list_for_each_safe(list_node, tmp_node, &iwcq->cmpl_generated) { + cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list); + if (cmpl_node->cpi.qp_id == iwqp->ibqp.qp_num) { + list_del(&cmpl_node->list); + kfree(cmpl_node); + } + } + spin_unlock_irqrestore(&iwcq->lock, flags); } @@ -390,8 +401,7 @@ irdma_setup_umode_qp(struct ib_udata *udata, ret = ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)); if (ret) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "ib_copy_from_data fail\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "ib_copy_from_data fail\n"); return ret; } @@ -406,8 +416,7 @@ irdma_setup_umode_qp(struct ib_udata *udata, if (!iwqp->iwpbl) { ret = -ENODATA; - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "no pbl info\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "no pbl info\n"); return ret; } } @@ -524,7 +533,6 @@ irdma_setup_kmode_qp(struct irdma_device *iwdev, info->shadow_area_pa = info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; - ukinfo->qp_id = iwqp->ibqp.qp_num; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; @@ -549,7 +557,6 @@ irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; - memset(qp_info, 0, sizeof(*qp_info)); qp_info->mac_valid = true; qp_info->cq_num_valid = true; qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE; @@ -558,6 +565,7 @@ irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) cqp_info->post_sq = 1; cqp_info->in.u.qp_create.qp = &iwqp->sc_qp; cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); @@ -574,7 +582,7 @@ irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_udp_offload_info *udp_info; udp_info = &iwqp->udp_info; - udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu)); + udp_info->snd_mss = ib_mtu_enum_to_int(iboe_get_mtu(iwdev->vsi.mtu)); udp_info->cwnd = iwdev->roce_cwnd; udp_info->rexmit_thresh = 2; udp_info->rnr_nak_thresh = 2; @@ -680,7 +688,26 @@ irdma_sched_qp_flush_work(struct irdma_qp *iwqp) } void -irdma_flush_worker(struct work_struct *work) +irdma_user_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, + dwork_flush); + + /* + * Set the WAIT flag to prevent a massive buildup of flush commands in the extreme case of many QPs lingering + * in the ERROR state. + */ + irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_REFLUSH | + IRDMA_FLUSH_WAIT); + + /* Re-arm continuously. Work is canceled when QP is deleted. */ + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_PERIODIC_FLUSH_MS)); +} + +void +irdma_kern_flush_worker(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); @@ -974,8 +1001,7 @@ irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_STATE) { if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state, iwqp->ibqp.qp_type, attr_mask)) { - irdma_dev_warn(&iwdev->ibdev, - "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n", + irdma_dev_warn(&iwdev->ibdev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n", iwqp->ibqp.qp_num, iwqp->ibqp_state, attr->qp_state); ret = -EINVAL; @@ -1022,8 +1048,7 @@ irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, udp_info->cwnd = iwdev->roce_cwnd; roce_info->ack_credits = iwdev->roce_ackcreds; if (iwdev->push_mode && udata && - iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && - dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { + iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); @@ -1090,14 +1115,14 @@ irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; + iwqp->sc_qp.qp_state = iwqp->iwarp_state; } if (iwqp->ibqp_state > IB_QPS_RTS && - !iwqp->flush_issued) { + !atomic_read(&iwqp->flush_issued)) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); - iwqp->flush_issued = 1; } else { spin_unlock_irqrestore(&iwqp->lock, flags); @@ -1198,8 +1223,7 @@ irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, issue_modify_qp = 1; } if (iwdev->push_mode && udata && - iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && - dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { + iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); @@ -1318,6 +1342,7 @@ irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; + iwqp->sc_qp.qp_state = iwqp->iwarp_state; } spin_unlock_irqrestore(&iwqp->lock, flags); } @@ -1337,7 +1362,7 @@ irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); if (iwqp->cm_node) { - atomic_inc(&iwqp->cm_node->refcnt); + irdma_add_ref_cmnode(iwqp->cm_node); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); close_timer_started = atomic_inc_return(&iwqp->close_timer_started); if (iwqp->cm_id && close_timer_started == 1) @@ -1345,7 +1370,7 @@ irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, (struct irdma_puda_buf *)iwqp, IRDMA_TIMER_TYPE_CLOSE, 1, 0); - irdma_rem_ref_cm_node(iwqp->cm_node); + irdma_rem_ref_cmnode(iwqp->cm_node); } else { spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); } @@ -1482,11 +1507,11 @@ irdma_resize_cq(struct ib_cq *ibcq, int entries, if (!iwcq->user_mode) { entries++; - if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) entries *= 2; } - info.cq_size = max(entries, 4); + info.cq_size = max_t(int, entries, 4); if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1) return 0; @@ -1557,6 +1582,7 @@ irdma_resize_cq(struct ib_cq *ibcq, int entries, cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq; cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request; cqp_info->post_sq = 1; + cqp_info->create = true; ret = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (ret) @@ -1668,7 +1694,7 @@ irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) u32 pg_idx; for (pg_idx = 0; pg_idx < npages; pg_idx++) { - if ((*arr + (pg_size * pg_idx)) != arr[pg_idx]) + if ((*arr + ((u64)pg_size * pg_idx)) != arr[pg_idx]) return false; } @@ -1835,6 +1861,44 @@ irdma_handle_q_mem(struct irdma_device *iwdev, } /** + * irdma_hw_alloc_mw - create the hw memory window + * @iwdev: irdma device + * @iwmr: pointer to memory window info + */ +int +irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) +{ + struct irdma_mw_alloc_info *info; + struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + struct irdma_cqp_request *cqp_request; + struct cqp_cmds_info *cqp_info; + int status; + + cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); + if (!cqp_request) + return -ENOMEM; + + cqp_info = &cqp_request->info; + info = &cqp_info->in.u.mw_alloc.info; + if (iwmr->ibmw.type == IB_MW_TYPE_1) + info->mw_wide = true; + + info->page_size = PAGE_SIZE; + info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; + info->pd_id = iwpd->sc_pd.pd_id; + info->remote_access = true; + cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC; + cqp_info->post_sq = 1; + cqp_info->in.u.mw_alloc.dev = &iwdev->rf->sc_dev; + cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; + status = irdma_handle_cqp_op(iwdev->rf, cqp_request); + irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); + + return status; +} + +/** * irdma_hw_alloc_stag - cqp command to allocate stag * @iwdev: irdma device * @iwmr: irdma mr pointer @@ -1856,7 +1920,6 @@ irdma_hw_alloc_stag(struct irdma_device *iwdev, cqp_info = &cqp_request->info; info = &cqp_info->in.u.alloc_stag.info; - memset(info, 0, sizeof(*info)); info->page_size = PAGE_SIZE; info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; @@ -1867,6 +1930,7 @@ irdma_hw_alloc_stag(struct irdma_device *iwdev, cqp_info->post_sq = 1; cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!status) @@ -1948,12 +2012,14 @@ irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, cqp_info = &cqp_request->info; stag_info = &cqp_info->in.u.mr_reg_non_shared.info; - memset(stag_info, 0, sizeof(*stag_info)); stag_info->va = iwpbl->user_base; stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; stag_info->stag_key = (u8)iwmr->stag; stag_info->total_len = iwmr->len; - stag_info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false; + if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) || iwmr->dma_mr) + stag_info->all_memory = true; + else + stag_info->all_memory = false; stag_info->access_rights = irdma_get_mr_access(access, iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev); stag_info->pd_id = iwpd->sc_pd.pd_id; @@ -1979,6 +2045,7 @@ irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, cqp_info->post_sq = 1; cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request; + cqp_info->create = true; ret = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); @@ -1992,7 +2059,7 @@ irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, * irdma_alloc_iwmr - Allocate iwmr @region - memory region @pd - protection domain @virt - virtual address @reg_type - * registration type */ -static struct irdma_mr * +struct irdma_mr * irdma_alloc_iwmr(struct ib_umem *region, struct ib_pd *pd, u64 virt, enum irdma_memreg_type reg_type) @@ -2022,19 +2089,17 @@ irdma_alloc_iwmr(struct ib_umem *region, return iwmr; } -static void +void irdma_free_iwmr(struct irdma_mr *iwmr) { kfree(iwmr); } /* - * irdma_reg_user_mr_type_mem - Handle memory registration - * @iwmr - irdma mr - * @access - access rights - * @create_stag - flag to create stag or not + * irdma_reg_user_mr_type_mem - Handle memory registration @iwmr - irdma mr @access - access rights @create_stag - flag + * to create stag or not */ -static int +int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access, bool create_stag) { @@ -2091,7 +2156,7 @@ free_pble: /* * irdma_reg_user_mr_type_qp - Handle QP memory registration @req - memory reg req @udata - user info @iwmr - irdma mr */ -static int +int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, struct ib_udata *udata, struct irdma_mr *iwmr) @@ -2104,6 +2169,11 @@ irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, int err; u8 lvl; + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) + return -EINVAL; + total = req.sq_pages + req.rq_pages + IRDMA_SHADOW_PGCNT; if (total > iwmr->page_cnt) return -EINVAL; @@ -2126,7 +2196,7 @@ irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, /* * irdma_reg_user_mr_type_cq - Handle CQ memory registration @req - memory reg req @udata - user info @iwmr - irdma mr */ -static int +int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req, struct ib_udata *udata, struct irdma_mr *iwmr) @@ -2158,85 +2228,6 @@ irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req, return 0; } -/** - * irdma_reg_user_mr - Register a user memory region - * @pd: ptr of pd - * @start: virtual start address - * @len: length of mr - * @virt: virtual address - * @access: access of mr - * @udata: user data - */ -static struct ib_mr * -irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, - u64 virt, int access, - struct ib_udata *udata) -{ -#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) - struct irdma_device *iwdev = to_iwdev(pd->device); - struct irdma_mem_reg_req req = {}; - struct ib_umem *region; - struct irdma_mr *iwmr; - int err; - - if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) - return ERR_PTR(-EINVAL); - - if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) - return ERR_PTR(-EINVAL); - - region = ib_umem_get(pd->uobject->context, start, len, access, 0); - - if (IS_ERR(region)) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "Failed to create ib_umem region\n"); - return (struct ib_mr *)region; - } - - if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { - ib_umem_release(region); - return ERR_PTR(-EFAULT); - } - - iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type); - if (IS_ERR(iwmr)) { - ib_umem_release(region); - return (struct ib_mr *)iwmr; - } - - switch (req.reg_type) { - case IRDMA_MEMREG_TYPE_QP: - err = irdma_reg_user_mr_type_qp(req, udata, iwmr); - if (err) - goto error; - - break; - case IRDMA_MEMREG_TYPE_CQ: - err = irdma_reg_user_mr_type_cq(req, udata, iwmr); - if (err) - goto error; - - break; - case IRDMA_MEMREG_TYPE_MEM: - err = irdma_reg_user_mr_type_mem(iwmr, access, true); - if (err) - goto error; - - break; - default: - err = -EINVAL; - goto error; - } - - return &iwmr->ibmr; - -error: - ib_umem_release(region); - irdma_free_iwmr(iwmr); - - return ERR_PTR(err); -} - int irdma_hwdereg_mr(struct ib_mr *ib_mr) { @@ -2262,10 +2253,11 @@ irdma_hwdereg_mr(struct ib_mr *ib_mr) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; - memset(info, 0, sizeof(*info)); info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = RS_64_1(ib_mr->rkey, IRDMA_CQPSQ_STAG_IDX_S); info->mr = true; + if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) + info->skip_flush_markers = true; if (iwpbl->pbl_allocated) info->dealloc_pbl = true; @@ -2303,7 +2295,8 @@ irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, if (IS_ERR(region)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "Failed to create ib_umem region\n"); + "Failed to create ib_umem region err=%ld\n", + PTR_ERR(region)); return (struct ib_mr *)region; } @@ -2335,10 +2328,11 @@ err: * @size: size of memory to register * @access: Access rights * @iova_start: start of virtual address for physical buffers + * @dma_mr: Flag indicating DMA Mem region */ struct ib_mr * irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access, - u64 *iova_start) + u64 *iova_start, bool dma_mr) { struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pbl *iwpbl; @@ -2355,6 +2349,7 @@ irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access, iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->type = IRDMA_MEMREG_TYPE_MEM; + iwmr->dma_mr = dma_mr; iwpbl->user_base = *iova_start; stag = irdma_create_stag(iwdev); if (!stag) { @@ -2394,7 +2389,7 @@ irdma_get_dma_mr(struct ib_pd *pd, int acc) { u64 kva = 0; - return irdma_reg_phys_mr(pd, 0, 0, acc, &kva); + return irdma_reg_phys_mr(pd, 0, 0, acc, &kva, true); } /** @@ -2547,7 +2542,7 @@ irdma_post_send(struct ib_qp *ibqp, break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; - info.local_fence = info.read_fence; + info.local_fence = true; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; @@ -2593,7 +2588,7 @@ irdma_post_send(struct ib_qp *ibqp, ib_wr = ib_wr->next; } - if (!iwqp->flush_issued) { + if (!atomic_read(&iwqp->flush_issued)) { if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) irdma_uk_qp_post_wr(ukqp); spin_unlock_irqrestore(&iwqp->lock, flags); @@ -2641,13 +2636,13 @@ irdma_post_recv(struct ib_qp *ibqp, "post_recv err %d\n", err); goto out; } - ib_wr = ib_wr->next; } out: spin_unlock_irqrestore(&iwqp->lock, flags); - if (iwqp->flush_issued) + + if (atomic_read(&iwqp->flush_issued)) irdma_sched_qp_flush_work(iwqp); if (err) @@ -2934,7 +2929,7 @@ irdma_req_notify_cq(struct ib_cq *ibcq, } if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) + (!irdma_uk_cq_empty(&iwcq->sc_cq.cq_uk) || !list_empty(&iwcq->cmpl_generated))) ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); @@ -3012,6 +3007,10 @@ irdma_mcast_cqp_op(struct irdma_device *iwdev, cqp_info->post_sq = 1; cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request; cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp; + + if (op == IRDMA_OP_MC_CREATE) + cqp_info->create = true; + status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); diff --git a/sys/dev/irdma/irdma_verbs.h b/sys/dev/irdma/irdma_verbs.h index 9a5b1cdb0381..d3f240783c3d 100644 --- a/sys/dev/irdma/irdma_verbs.h +++ b/sys/dev/irdma/irdma_verbs.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2023 Intel Corporation + * Copyright (c) 2015 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +37,7 @@ #define IRDMA_MAX_SAVED_PHY_PGADDR 4 #define IRDMA_FLUSH_DELAY_MS 20 +#define IRDMA_PERIODIC_FLUSH_MS 2000 #define IRDMA_PKEY_TBL_SZ 1 #define IRDMA_DEFAULT_PKEY 0xFFFF @@ -141,6 +142,7 @@ struct irdma_mr { int access; u8 is_hwreg; u16 type; + bool dma_mr:1; u32 page_cnt; u64 page_size; u64 page_msk; @@ -154,21 +156,15 @@ struct irdma_mr { struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; - u16 cq_head; - u16 cq_size; - u16 cq_num; + u32 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; - u32 polled_cmpls; - u32 cq_mem_size; struct irdma_dma_mem kmem; struct irdma_dma_mem kmem_shadow; struct completion free_cq; atomic_t refcnt; spinlock_t lock; /* for poll cq */ - struct irdma_pbl *iwpbl; - struct irdma_pbl *iwpbl_shadow; struct list_head resize_list; struct irdma_cq_poll_info cur_cqe; struct list_head cmpl_generated; @@ -244,10 +240,12 @@ struct irdma_qp { dma_addr_t pbl_pbase; struct page *page; u8 iwarp_state; + atomic_t flush_issued; u16 term_sq_flush_code; u16 term_rq_flush_code; u8 hw_iwarp_state; u8 hw_tcp_state; + u8 ae_src; struct irdma_qp_kmode kqp; struct irdma_dma_mem host_ctx; struct timer_list terminate_timer; @@ -262,7 +260,6 @@ struct irdma_qp { bool active_conn:1; bool user_mode:1; bool hte_added:1; - bool flush_issued:1; bool sig_all:1; bool pau_mode:1; bool suspend_pending:1; @@ -385,6 +382,12 @@ static inline void irdma_mcast_mac_v6(u32 *ip_addr, u8 *mac) struct rdma_user_mmap_entry* irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset); +struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, + struct ib_pd *pd, u64 virt, + enum irdma_memreg_type reg_type); +void irdma_free_iwmr(struct irdma_mr *iwmr); +int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access, + bool create_stag); int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event); @@ -392,5 +395,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp); void irdma_remove_cmpls_list(struct irdma_cq *iwcq); int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info); void irdma_sched_qp_flush_work(struct irdma_qp *iwqp); -void irdma_flush_worker(struct work_struct *work); +void irdma_kern_flush_worker(struct work_struct *work); +void irdma_user_flush_worker(struct work_struct *work); +int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr); #endif /* IRDMA_VERBS_H */ diff --git a/sys/dev/irdma/irdma_ws.c b/sys/dev/irdma/irdma_ws.c index d311343111b9..af781f23a22c 100644 --- a/sys/dev/irdma/irdma_ws.c +++ b/sys/dev/irdma/irdma_ws.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2023 Intel Corporation + * Copyright (c) 2017 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -118,10 +118,11 @@ irdma_free_node(struct irdma_sc_vsi *vsi, * @vsi: vsi pointer * @node: pointer to node * @cmd: add, remove or modify + * @qs_handle: Pointer to store the qs_handle for a leaf node */ static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *node, u8 cmd) + struct irdma_ws_node *node, u8 cmd, u16 *qs_handle) { struct irdma_ws_node_info node_info = {0}; @@ -142,10 +143,8 @@ irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, return -ENOMEM; } - if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) { - node->qs_handle = node_info.qs_handle; - vsi->qos[node->user_pri].qs_handle = node_info.qs_handle; - } + if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE && qs_handle) + *qs_handle = node_info.qs_handle; return 0; } @@ -193,11 +192,8 @@ irdma_ws_in_use(struct irdma_sc_vsi *vsi, u8 user_pri) { int i; - mutex_lock(&vsi->qos[user_pri].qos_mutex); - if (!list_empty(&vsi->qos[user_pri].qplist)) { - mutex_unlock(&vsi->qos[user_pri].qos_mutex); + if (!list_empty(&vsi->qos[user_pri].qplist)) return true; - } /* * Check if the qs handle associated with the given user priority is in use by any other user priority. If so, @@ -205,12 +201,9 @@ irdma_ws_in_use(struct irdma_sc_vsi *vsi, u8 user_pri) */ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { if (vsi->qos[i].qs_handle == vsi->qos[user_pri].qs_handle && - !list_empty(&vsi->qos[i].qplist)) { - mutex_unlock(&vsi->qos[user_pri].qos_mutex); + !list_empty(&vsi->qos[i].qplist)) return true; - } } - mutex_unlock(&vsi->qos[user_pri].qos_mutex); return false; } @@ -228,9 +221,10 @@ irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri) int i; qs_handle = vsi->qos[user_pri].qs_handle; - for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { if (vsi->qos[i].qs_handle == qs_handle) vsi->qos[i].valid = false; + } ws_tree_root = vsi->dev->ws_tree_root; if (!ws_tree_root) @@ -247,25 +241,93 @@ irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri) if (!tc_node) return; - irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); - vsi->unregister_qset(vsi, tc_node); list_del(&tc_node->siblings); + irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE, NULL); + + vsi->unregister_qset(vsi, tc_node); irdma_free_node(vsi, tc_node); /* Check if VSI node can be freed */ if (list_empty(&vsi_node->child_list_head)) { - irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE); + irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE, NULL); list_del(&vsi_node->siblings); irdma_free_node(vsi, vsi_node); /* Free head node there are no remaining VSI nodes */ if (list_empty(&ws_tree_root->child_list_head)) { irdma_ws_cqp_cmd(vsi, ws_tree_root, - IRDMA_OP_WS_DELETE_NODE); + IRDMA_OP_WS_DELETE_NODE, NULL); irdma_free_node(vsi, ws_tree_root); vsi->dev->ws_tree_root = NULL; } } } +static int +irdma_enable_leaf(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node) +{ + int ret; + + ret = vsi->register_qset(vsi, tc_node); + if (ret) + return ret; + + tc_node->enable = true; + ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE, NULL); + if (ret) + goto enable_err; + return 0; + +enable_err: + vsi->unregister_qset(vsi, tc_node); + + return ret; +} + +static struct irdma_ws_node * +irdma_add_leaf_node(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *vsi_node, + u8 user_pri, u16 traffic_class) +{ + struct irdma_ws_node *tc_node = + irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF, vsi_node); + int i, ret = 0; + + if (!tc_node) + return NULL; + ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE, &tc_node->qs_handle); + if (ret) { + irdma_free_node(vsi, tc_node); + return NULL; + } + vsi->qos[tc_node->user_pri].qs_handle = tc_node->qs_handle; + + list_add(&tc_node->siblings, &vsi_node->child_list_head); + + ret = irdma_enable_leaf(vsi, tc_node); + if (ret) + goto reg_err; + + /* + * Iterate through other UPs and update the QS handle if they have a matching traffic class. + */ + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + if (vsi->qos[i].traffic_class == traffic_class) { + vsi->qos[i].qs_handle = tc_node->qs_handle; + vsi->qos[i].l2_sched_node_id = + tc_node->l2_sched_node_id; + vsi->qos[i].valid = true; + } + } + return tc_node; + +reg_err: + irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE, NULL); + list_del(&tc_node->siblings); + irdma_free_node(vsi, tc_node); + + return NULL; +} + /** * irdma_ws_add - Build work scheduler tree, set RDMA qs_handle * @vsi: vsi pointer @@ -279,7 +341,6 @@ irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) struct irdma_ws_node *tc_node; u16 traffic_class; int ret = 0; - int i; mutex_lock(&vsi->dev->ws_mutex); if (vsi->tc_change_pending) { @@ -298,9 +359,11 @@ irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) ret = -ENOMEM; goto exit; } - irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Creating root node = %d\n", ws_tree_root->index); + irdma_debug(vsi->dev, IRDMA_DEBUG_WS, + "Creating root node = %d\n", ws_tree_root->index); - ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE); + ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE, + NULL); if (ret) { irdma_free_node(vsi, ws_tree_root); goto exit; @@ -324,7 +387,8 @@ irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) goto vsi_add_err; } - ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE); + ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE, + NULL); if (ret) { irdma_free_node(vsi, vsi_node); goto vsi_add_err; @@ -344,56 +408,22 @@ irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Node not found matching VSI %d and TC %d\n", vsi->vsi_idx, traffic_class); - tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF, - vsi_node); + tc_node = irdma_add_leaf_node(vsi, vsi_node, user_pri, + traffic_class); if (!tc_node) { ret = -ENOMEM; goto leaf_add_err; } - - ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE); - if (ret) { - irdma_free_node(vsi, tc_node); - goto leaf_add_err; - } - - list_add(&tc_node->siblings, &vsi_node->child_list_head); - /* - * callback to LAN to update the LAN tree with our node - */ - ret = vsi->register_qset(vsi, tc_node); - if (ret) - goto reg_err; - - tc_node->enable = true; - ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE); - if (ret) { - vsi->unregister_qset(vsi, tc_node); - goto reg_err; - } } irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Using node %d which represents VSI %d TC %d\n", tc_node->index, vsi->vsi_idx, traffic_class); - /* - * Iterate through other UPs and update the QS handle if they have a matching traffic class. - */ - for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - if (vsi->qos[i].traffic_class == traffic_class) { - vsi->qos[i].qs_handle = tc_node->qs_handle; - vsi->qos[i].l2_sched_node_id = tc_node->l2_sched_node_id; - vsi->qos[i].valid = true; - } - } goto exit; -reg_err: - irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); - list_del(&tc_node->siblings); - irdma_free_node(vsi, tc_node); leaf_add_err: if (list_empty(&vsi_node->child_list_head)) { - if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE)) + if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE, + NULL)) goto exit; list_del(&vsi_node->siblings); irdma_free_node(vsi, vsi_node); @@ -402,7 +432,8 @@ leaf_add_err: vsi_add_err: /* Free head node there are no remaining VSI nodes */ if (list_empty(&ws_tree_root->child_list_head)) { - irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE); + irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE, + NULL); vsi->dev->ws_tree_root = NULL; irdma_free_node(vsi, ws_tree_root); } @@ -420,12 +451,14 @@ exit: void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri) { + mutex_lock(&vsi->qos[user_pri].qos_mutex); mutex_lock(&vsi->dev->ws_mutex); if (irdma_ws_in_use(vsi, user_pri)) goto exit; irdma_remove_leaf(vsi, user_pri); exit: mutex_unlock(&vsi->dev->ws_mutex); + mutex_unlock(&vsi->qos[user_pri].qos_mutex); } /** diff --git a/sys/dev/irdma/osdep.h b/sys/dev/irdma/osdep.h index 831bd50f3ae4..eb73ffbd30e2 100644 --- a/sys/dev/irdma/osdep.h +++ b/sys/dev/irdma/osdep.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2023 Intel Corporation + * Copyright (c) 2021 - 2026 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -85,13 +85,18 @@ #define STATS_TIMER_DELAY 60000 /* a couple of linux size defines */ -#define SZ_128 128 +#define SZ_128 128 #define SPEED_1000 1000 +#define SPEED_2500 2500 +#define SPEED_5000 5000 #define SPEED_10000 10000 +#define SPEED_14000 14000 #define SPEED_20000 20000 #define SPEED_25000 25000 #define SPEED_40000 40000 +#define SPEED_50000 50000 #define SPEED_100000 100000 +#define SPEED_200000 200000 #define irdma_mb() mb() #define irdma_wmb() wmb() diff --git a/sys/modules/irdma/Makefile b/sys/modules/irdma/Makefile index a9ef6e63d3f2..8377e2da57a0 100644 --- a/sys/modules/irdma/Makefile +++ b/sys/modules/irdma/Makefile @@ -8,7 +8,7 @@ KMOD= irdma SRCS= icrdma.c SRCS+= fbsd_kcompat.c irdma_hw.c irdma_verbs.c irdma_utils.c SRCS+= irdma_cm.c irdma_kcompat.c -SRCS+= irdma_if.h irdma_di_if.h ice_rdma.h vnode_if.h device_if.h bus_if.h opt_inet.h pci_if.h opt_inet6.h +SRCS+= irdma_if.h irdma_di_if.h ice_rdma.h vnode_if.h device_if.h bus_if.h opt_inet.h pci_if.h opt_inet6.h opt_wlan.h # Shared source SRCS+= irdma_ctrl.c irdma_hmc.c icrdma_hw.c irdma_pble.c irdma_puda.c irdma_uda.c irdma_uk.c irdma_ws.c |
