aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Zec <zec@FreeBSD.org>2020-08-30 07:34:32 +0000
committerMarko Zec <zec@FreeBSD.org>2020-08-30 07:34:32 +0000
commitbd368728679f0b7746c81aec5b863528a67a2d97 (patch)
tree843404adcc85af8ea8c6e9cb834259d9d58069d8
parent0820428967bca4dce5283ad65a42fccf3bae9529 (diff)
downloadsrc-bd368728679f0b7746c81aec5b863528a67a2d97.tar.gz
src-bd368728679f0b7746c81aec5b863528a67a2d97.zip
Driver for 4x10Gb Ethernet reference NIC FPGA design for NetFPGA SUME
development board. Submitted by: Denis Salopek <denis.salopek AT fer.hr> Reported by: zec, bz (src); rgrimes, bcr (manpages) MFC after: 7 days Sponsored by: Google Summer of Code 2020 Differential Revision: https://reviews.freebsd.org/D26074
Notes
Notes: svn path=/head/; revision=364973
-rw-r--r--share/man/man4/Makefile3
-rw-r--r--share/man/man4/sume.498
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/dev/sume/adapter.h242
-rw-r--r--sys/dev/sume/if_sume.c1602
-rw-r--r--sys/modules/Makefile2
-rw-r--r--sys/modules/sume/Makefile9
7 files changed, 1957 insertions, 0 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index c23ccabd44d0..34cb8e10a060 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -514,6 +514,7 @@ MAN= aac.4 \
ste.4 \
stf.4 \
stge.4 \
+ ${_sume.4} \
${_superio.4} \
sym.4 \
syncache.4 \
@@ -851,6 +852,7 @@ _qlxgbe.4= qlxgbe.4
_qlnxe.4= qlnxe.4
_sfxge.4= sfxge.4
_smartpqi.4= smartpqi.4
+_sume.4= sume.4
_vmd.4= vmd.4
MLINKS+=qlxge.4 if_qlxge.4
@@ -858,6 +860,7 @@ MLINKS+=qlxgb.4 if_qlxgb.4
MLINKS+=qlxgbe.4 if_qlxgbe.4
MLINKS+=qlnxe.4 if_qlnxe.4
MLINKS+=sfxge.4 if_sfxge.4
+MLINKS+=sume.4 if_sume.4
.if ${MK_BHYVE} != "no"
_bhyve.4= bhyve.4
diff --git a/share/man/man4/sume.4 b/share/man/man4/sume.4
new file mode 100644
index 000000000000..66c221cc2d08
--- /dev/null
+++ b/share/man/man4/sume.4
@@ -0,0 +1,98 @@
+.\"-
+.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+.\"
+.\" Copyright (c) 2020 Denis Salopek
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd August 30, 2020
+.Dt SUME 4
+.Os
+.Sh NAME
+.Nm sume
+.Nd "NetFPGA SUME 4x10Gb Ethernet driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines
+in your kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device sume"
+.Ed
+.Pp
+Alternatively, to load the driver as a module at boot time, place
+the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_sume_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides support for NetFPGA SUME Virtex-7 FPGA Development Board
+with the reference NIC bitstream loaded onto it.
+The HDL design for the reference NIC project uses the RIFFA based DMA
+engine to communicate with the host machine over PCIe.
+Every packet is transmitted to / from the board via a single DMA
+transaction, taking up to two or three interrupts per one transaction
+which yields low performance.
+.Pp
+There is no support for Jumbo frames as the hardware is capable of
+dealing only with frames with maximum size of 1514 bytes.
+The hardware does not support multicast filtering, provides no checksums,
+and offers no other offloading.
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr netgraph 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr vlan 4 ,
+.Xr ifconfig 8
+.Sh AUTHORS
+The Linux
+.Nm
+driver was originally written by
+.An -nosplit
+.An Bjoern A. Zeeb .
+The
+.Fx version and this manual page were written by
+.An Denis Salopek
+as a GSoC project.
+More information about the project can be found here:
+.Pa https://wiki.freebsd.org/SummerOfCode2020Projects/NetFPGA_SUME_Driver
+.Sh BUGS
+The reference NIC hardware design provides no mechanism for quiescing
+inbound traffic from interfaces configured as DOWN.
+All packets from administratively disabled interfaces are transferred to
+main memory, leaving the driver with the task of dropping such packets,
+thus consuming PCI bandwidth, interrupts and CPU cycles in vain.
+.Pp
+Pre-built FPGA bitstream from the NetFPGA project may not work correctly.
+At higher RX packet rates, the newly incoming packets can overwrite the
+ones in an internal FIFO so the packets would arrive in main memory
+corrupted, until a physical reset of the board.
+.Pp
+Occasionally, the driver can get stuck in a non-IDLE TX state due to
+a missed interrupt.
+The driver includes a watchdog function which monitors for such a
+condition and resets the board automatically.
+For more details, visit the NetFPGA SUME project site.
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index f9e6d0925820..f1a287d6d30c 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -355,6 +355,7 @@ dev/smartpqi/smartpqi_response.c optional smartpqi
dev/smartpqi/smartpqi_sis.c optional smartpqi
dev/smartpqi/smartpqi_tag.c optional smartpqi
dev/speaker/spkr.c optional speaker
+dev/sume/if_sume.c optional sume
dev/superio/superio.c optional superio isa
dev/syscons/apm/apm_saver.c optional apm_saver apm
dev/syscons/scvesactl.c optional sc vga vesa
diff --git a/sys/dev/sume/adapter.h b/sys/dev/sume/adapter.h
new file mode 100644
index 000000000000..4f34ee05d8e1
--- /dev/null
+++ b/sys/dev/sume/adapter.h
@@ -0,0 +1,242 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Bjoern A. Zeeb
+ * Copyright (c) 2020 Denis Salopek
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
+ * ("MRC2"), as part of the DARPA MRC research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#define DEFAULT_ETHER_ADDRESS "\02SUME\00"
+#define SUME_ETH_DEVICE_NAME "sume"
+#define MAX_IFC_NAME_LEN 8
+
+#define SUME_NPORTS 4
+
+#define SUME_IOCTL_CMD_WRITE_REG (SIOCGPRIVATE_0)
+#define SUME_IOCTL_CMD_READ_REG (SIOCGPRIVATE_1)
+
+#define SUME_LOCK(adapter) mtx_lock(&adapter->lock);
+#define SUME_UNLOCK(adapter) mtx_unlock(&adapter->lock);
+
+/* Currently SUME only uses 2 fixed channels for all port traffic and regs. */
+#define SUME_RIFFA_CHANNEL_DATA 0
+#define SUME_RIFFA_CHANNEL_REG 1
+#define SUME_RIFFA_CHANNELS 2
+
+/* RIFFA constants. */
+#define RIFFA_MAX_CHNLS 12
+#define RIFFA_MAX_BUS_WIDTH_PARAM 4
+#define RIFFA_SG_BUF_SIZE (4*1024)
+#define RIFFA_SG_ELEMS 200
+
+/* RIFFA register offsets. */
+#define RIFFA_RX_SG_LEN_REG_OFF 0x0
+#define RIFFA_RX_SG_ADDR_LO_REG_OFF 0x1
+#define RIFFA_RX_SG_ADDR_HI_REG_OFF 0x2
+#define RIFFA_RX_LEN_REG_OFF 0x3
+#define RIFFA_RX_OFFLAST_REG_OFF 0x4
+#define RIFFA_TX_SG_LEN_REG_OFF 0x5
+#define RIFFA_TX_SG_ADDR_LO_REG_OFF 0x6
+#define RIFFA_TX_SG_ADDR_HI_REG_OFF 0x7
+#define RIFFA_TX_LEN_REG_OFF 0x8
+#define RIFFA_TX_OFFLAST_REG_OFF 0x9
+#define RIFFA_INFO_REG_OFF 0xA
+#define RIFFA_IRQ_REG0_OFF 0xB
+#define RIFFA_IRQ_REG1_OFF 0xC
+#define RIFFA_RX_TNFR_LEN_REG_OFF 0xD
+#define RIFFA_TX_TNFR_LEN_REG_OFF 0xE
+
+#define RIFFA_CHNL_REG(c, o) ((c << 4) + o)
+
+/*
+ * RIFFA state machine;
+ * rather than using complex circular buffers for 1 transaction.
+ */
+#define SUME_RIFFA_CHAN_STATE_IDLE 0x01
+#define SUME_RIFFA_CHAN_STATE_READY 0x02
+#define SUME_RIFFA_CHAN_STATE_READ 0x04
+#define SUME_RIFFA_CHAN_STATE_LEN 0x08
+
+/* Accessor macros. */
+#define SUME_OFFLAST ((0 << 1) | (1 & 0x01))
+#define SUME_RIFFA_LAST(offlast) ((offlast) & 0x01)
+#define SUME_RIFFA_OFFSET(offlast) ((uint64_t)((offlast) >> 1) << 2)
+#define SUME_RIFFA_LEN(len) ((uint64_t)(len) << 2)
+
+#define SUME_RIFFA_LO_ADDR(addr) (addr & 0xFFFFFFFF)
+#define SUME_RIFFA_HI_ADDR(addr) ((addr >> 32) & 0xFFFFFFFF)
+
+/* Vector bits. */
+#define SUME_MSI_RXQUE (1 << 0)
+#define SUME_MSI_RXBUF (1 << 1)
+#define SUME_MSI_RXDONE (1 << 2)
+#define SUME_MSI_TXBUF (1 << 3)
+#define SUME_MSI_TXDONE (1 << 4)
+
+/* Invalid vector. */
+#define SUME_INVALID_VECT 0xc0000000
+
+/* Module register data (packet counters, link status...) */
+#define SUME_MOD0_REG_BASE 0x44040000
+#define SUME_MOD_REG(port) (SUME_MOD0_REG_BASE + 0x10000 * port)
+
+#define SUME_RESET_OFFSET 0x8
+#define SUME_PKTIN_OFFSET 0x18
+#define SUME_PKTOUT_OFFSET 0x1c
+#define SUME_STATUS_OFFSET 0x48
+
+#define SUME_RESET_ADDR(p) (SUME_MOD_REG(p) + SUME_RESET_OFFSET)
+#define SUME_STAT_RX_ADDR(p) (SUME_MOD_REG(p) + SUME_PKTIN_OFFSET)
+#define SUME_STAT_TX_ADDR(p) (SUME_MOD_REG(p) + SUME_PKTOUT_OFFSET)
+#define SUME_STATUS_ADDR(p) (SUME_MOD_REG(p) + SUME_STATUS_OFFSET)
+
+#define SUME_LINK_STATUS(val) ((val >> 12) & 0x1)
+
+/* Various bits and pieces. */
+#define SUME_RIFFA_MAGIC 0xcafe
+#define SUME_MR_WRITE 0x1f
+#define SUME_MR_READ 0x00
+#define SUME_INIT_RTAG -3
+#define SUME_DPORT_MASK 0xaa
+#define SUME_MIN_PKT_SIZE (ETHER_MIN_LEN - ETHER_CRC_LEN)
+
+struct irq {
+ uint32_t rid;
+ struct resource *res;
+ void *tag;
+} __aligned(CACHE_LINE_SIZE);
+
+struct nf_stats {
+ uint64_t hw_rx_packets;
+ uint64_t hw_tx_packets;
+ uint64_t ifc_down_bytes;
+ uint64_t ifc_down_packets;
+ uint64_t rx_bytes;
+ uint64_t rx_dropped;
+ uint64_t rx_packets;
+ uint64_t tx_bytes;
+ uint64_t tx_dropped;
+ uint64_t tx_packets;
+};
+
+struct riffa_chnl_dir {
+ uint32_t state;
+ bus_dma_tag_t ch_tag;
+ bus_dmamap_t ch_map;
+ char *buf_addr; /* bouncebuf addresses+len. */
+ bus_addr_t buf_hw_addr; /* -- " -- mapped. */
+ uint32_t num_sg;
+ uint32_t event; /* Used for modreg r/w */
+ uint32_t len; /* words */
+ uint32_t offlast;
+ uint32_t recovery;
+ uint32_t rtag;
+};
+
+struct sume_ifreq {
+ uint32_t addr;
+ uint32_t val;
+};
+
+struct nf_priv {
+ struct sume_adapter *adapter;
+ struct ifmedia media;
+ struct nf_stats stats;
+ uint32_t unit;
+ uint32_t port;
+ uint32_t link_up;
+};
+
+struct sume_adapter {
+ struct mtx lock;
+ uint32_t running;
+ uint32_t rid;
+ struct riffa_chnl_dir **recv;
+ struct riffa_chnl_dir **send;
+ device_t dev;
+ struct ifnet *ifp[SUME_NPORTS];
+ struct resource *bar0_addr;
+ bus_space_tag_t bt;
+ bus_space_handle_t bh;
+ bus_size_t bar0_len;
+ struct irq irq;
+ struct callout timer;
+ struct task stat_task;
+ struct taskqueue *tq;
+ uint64_t bytes_err;
+ uint64_t packets_err;
+ uint32_t last_ifc;
+ uint32_t num_sg;
+ uint32_t sg_buf_size;
+ uint32_t sume_debug;
+ uint32_t wd_counter;
+};
+
+/* SUME metadata:
+ * sport - not used for RX. For TX, set to 0x02, 0x08, 0x20, 0x80, depending on
+ * the sending interface (nf0, nf1, nf2 or nf3).
+ * dport - For RX, is set to 0x02, 0x08, 0x20, 0x80, depending on the receiving
+ * interface (nf0, nf1, nf2 or nf3). For TX, set to 0x01, 0x04, 0x10, 0x40,
+ * depending on the sending HW interface (nf0, nf1, nf2 or nf3).
+ * plen - length of the send/receive packet data (in bytes)
+ * magic - SUME hardcoded magic number which should be 0xcafe
+ * t1, t1 - could be used for timestamping by SUME
+ */
+struct nf_metadata {
+ uint16_t sport;
+ uint16_t dport;
+ uint16_t plen;
+ uint16_t magic;
+ uint32_t t1;
+ uint32_t t2;
+};
+
+/* Used for ioctl communication with the rwaxi program used to read/write SUME
+ * internally defined register data.
+ * addr - address of the SUME module register to read/write
+ * val - value to write/read to/from the register
+ * rtag - returned on read: transaction tag, for syncronization
+ * optype - 0x1f when writing, 0x00 for reading
+ */
+struct nf_regop_data {
+ uint32_t addr;
+ uint32_t val;
+ uint32_t rtag;
+ uint32_t optype;
+};
+
+/* Our bouncebuffer "descriptor". This holds our physical address (lower and
+ * upper values) of the beginning of the DMA data to RX/TX. The len is number
+ * of words to transmit.
+ */
+struct nf_bb_desc {
+ uint32_t lower;
+ uint32_t upper;
+ uint32_t len;
+};
diff --git a/sys/dev/sume/if_sume.c b/sys/dev/sume/if_sume.c
new file mode 100644
index 000000000000..ba9a5098000d
--- /dev/null
+++ b/sys/dev/sume/if_sume.c
@@ -0,0 +1,1602 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Bjoern A. Zeeb
+ * Copyright (c) 2020 Denis Salopek
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
+ * ("MRC2"), as part of the DARPA MRC research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <machine/bus.h>
+
+#include "adapter.h"
+
+#define PCI_VENDOR_ID_XILINX 0x10ee
+#define PCI_DEVICE_ID_SUME 0x7028
+
+/* SUME bus driver interface */
+static int sume_probe(device_t);
+static int sume_attach(device_t);
+static int sume_detach(device_t);
+
+static device_method_t sume_methods[] = {
+ DEVMETHOD(device_probe, sume_probe),
+ DEVMETHOD(device_attach, sume_attach),
+ DEVMETHOD(device_detach, sume_detach),
+ DEVMETHOD_END
+};
+
+static driver_t sume_driver = {
+ "sume",
+ sume_methods,
+ sizeof(struct sume_adapter)
+};
+
+/*
+ * The DMA engine for SUME generates interrupts for each RX/TX transaction.
+ * Depending on the channel (0 if packet transaction, 1 if register transaction)
+ * the used bits of the interrupt vector will be the lowest or the second lowest
+ * 5 bits.
+ *
+ * When receiving packets from SUME (RX):
+ * (1) SUME received a packet on one of the interfaces.
+ * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
+ * transaction).
+ * (3) We read the length of the incoming packet and the offset along with the
+ * 'last' flag from the SUME registers.
+ * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
+ * address buf_addr. For now, this is how it's done:
+ * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
+ * address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
+ * and length of incoming data (buf_addr[2]).
+ * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
+ * physical address buf_hw_addr is a block of contiguous memory mapped to
+ * buf_addr, so we can set the incoming data's physical address (buf_addr[0]
+ * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
+ * (5) We notify SUME that the bouncebuffer is ready for the transaction by
+ * writing the lower/upper physical address buf_hw_addr to the SUME
+ * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
+ * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
+ * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
+ * bouncebuffer received).
+ * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
+ * transaction is done).
+ * (8) SUME can do both steps (6) and (7) using the same interrupt.
+ * (8) We read the first 16 bytes (metadata) of the received data and note the
+ * incoming interface so we can later forward it to the right one in the OS
+ * (sume0, sume1, sume2 or sume3).
+ * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
+ * and set the mbuf rcvif to the incoming interface.
+ * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
+ *
+ * When sending packets to SUME (TX):
+ * (1) The OS calls sume_if_start() function on TX.
+ * (2) We get the mbuf packet data and copy it to the
+ * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
+ * (3) We create the metadata based on the output interface and copy it to the
+ * buf_addr+3*sizeof(uint32_t).
+ * (4) We write the offset/last and length of the packet to the SUME registers
+ * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
+ * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
+ * with the physical address and length just as in RX step (4).
+ * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
+ * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
+ * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
+ * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
+ * bouncebuffer is read).
+ * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
+ * transaction is done).
+ * (9) SUME can do both steps (7) and (8) using the same interrupt.
+ *
+ * Internal registers
+ * Every module in the SUME hardware has its own set of internal registers
+ * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
+ * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
+ * offsets to different memory locations of every module are defined in their
+ * corresponding folder inside the library. These registers can be RO/RW and
+ * there is a special method to fetch/change this data over 1 or 2 DMA
+ * transactions. For writing, by calling the sume_module_reg_write(). For
+ * reading, by calling the sume_module_reg_write() and then
+ * sume_module_reg_read(). Check those functions for more information.
+ */
+
+MALLOC_DECLARE(M_SUME);
+MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
+
+static void check_tx_queues(struct sume_adapter *);
+static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
+ uint64_t);
+
+static struct unrhdr *unr;
+
+static struct {
+ uint16_t device;
+ char *desc;
+} sume_pciids[] = {
+ {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
+};
+
+static inline uint32_t
+read_reg(struct sume_adapter *adapter, int offset)
+{
+
+ return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
+}
+
+static inline void
+write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
+{
+
+ bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
+}
+
+static int
+sume_probe(device_t dev)
+{
+ int i;
+ uint16_t v = pci_get_vendor(dev);
+ uint16_t d = pci_get_device(dev);
+
+ if (v != PCI_VENDOR_ID_XILINX)
+ return (ENXIO);
+
+ for (i = 0; i < nitems(sume_pciids); i++) {
+ if (d == sume_pciids[i].device) {
+ device_set_desc(dev, sume_pciids[i].desc);
+ return (BUS_PROBE_DEFAULT);
+ }
+ }
+
+ return (ENXIO);
+}
+
+/*
+ * Building mbuf for packet received from SUME. We expect to receive 'len'
+ * bytes of data (including metadata) written from the bouncebuffer address
+ * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
+ * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
+ * and the magic word needs to be 0xcafe. When we have the packet data, we
+ * create an mbuf and copy the data to it using m_copyback() function, set the
+ * correct interface to rcvif and return the mbuf to be later sent to the OS
+ * with if_input.
+ */
+static struct mbuf *
+sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
+{
+ struct nf_priv *nf_priv;
+ struct mbuf *m;
+ struct ifnet *ifp = NULL;
+ int np;
+ uint16_t dport, plen, magic;
+ device_t dev = adapter->dev;
+ uint8_t *indata = (uint8_t *)
+ adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
+ sizeof(struct nf_bb_desc);
+ struct nf_metadata *mdata = (struct nf_metadata *) indata;
+
+ /* The metadata header is 16 bytes. */
+ if (len < sizeof(struct nf_metadata)) {
+ device_printf(dev, "short frame (%d)\n", len);
+ adapter->packets_err++;
+ adapter->bytes_err += len;
+ return (NULL);
+ }
+
+ dport = le16toh(mdata->dport);
+ plen = le16toh(mdata->plen);
+ magic = le16toh(mdata->magic);
+
+ if (sizeof(struct nf_metadata) + plen > len ||
+ magic != SUME_RIFFA_MAGIC) {
+ device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
+ "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
+ len, magic, SUME_RIFFA_MAGIC);
+ return (NULL);
+ }
+
+ /* We got the packet from one of the even bits */
+ np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
+ if (np > SUME_NPORTS) {
+ device_printf(dev, "invalid destination port 0x%04x (%d)\n",
+ dport, np);
+ adapter->packets_err++;
+ adapter->bytes_err += plen;
+ return (NULL);
+ }
+ ifp = adapter->ifp[np];
+ nf_priv = ifp->if_softc;
+ nf_priv->stats.rx_packets++;
+ nf_priv->stats.rx_bytes += plen;
+
+ /* If the interface is down, well, we are done. */
+ if (!(ifp->if_flags & IFF_UP)) {
+ nf_priv->stats.ifc_down_packets++;
+ nf_priv->stats.ifc_down_bytes += plen;
+ return (NULL);
+ }
+
+ if (adapter->sume_debug)
+ printf("Building mbuf with length: %d\n", plen);
+
+ m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ adapter->packets_err++;
+ adapter->bytes_err += plen;
+ return (NULL);
+ }
+
+ /* Copy the data in at the right offset. */
+ m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
+ m->m_pkthdr.rcvif = ifp;
+
+ return (m);
+}
+
+/*
+ * SUME interrupt handler for when we get a valid interrupt from the board.
+ * Theoretically, we can receive interrupt for any of the available channels,
+ * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
+ * bit number, using 5 bits for every channel, the least significant bits
+ * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
+ * bits for RX/TX are:
+ * RX
+ * bit 0 - new transaction from SUME
+ * bit 1 - SUME received our bouncebuffer address
+ * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
+ * TX
+ * bit 3 - SUME received our bouncebuffer address
+ * bit 4 - SUME copied the data from our bouncebuffer, transaction done
+ *
+ * There are two finite state machines (one for TX, one for RX). We loop
+ * through channels 0 and 1 to check and our current state and which interrupt
+ * bit is set.
+ * TX
+ * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
+ * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
+ * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
+ * to go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
+ * our packet). Then we get the length of the sent data and go back to the
+ * IDLE state.
+ * RX
+ * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
+ * transaction). When we get it, we prepare our bouncebuffer for reading and
+ * trigger the SUME to start the transaction. Go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
+ * bouncebuffer). Go to the next state.
+ * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
+ * we can build the mbuf and go back to the IDLE state.
+ */
+static void
+sume_intr_handler(void *arg)
+{
+ struct sume_adapter *adapter = arg;
+ uint32_t vect, vect0, len;
+ int ch, loops;
+ device_t dev = adapter->dev;
+ struct mbuf *m = NULL;
+ struct ifnet *ifp = NULL;
+ struct riffa_chnl_dir *send, *recv;
+
+ SUME_LOCK(adapter);
+
+ vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
+ if ((vect0 & SUME_INVALID_VECT) != 0) {
+ SUME_UNLOCK(adapter);
+ return;
+ }
+
+ /*
+ * We only have one interrupt for all channels and no way
+ * to quickly lookup for which channel(s) we got an interrupt?
+ */
+ for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
+ vect = vect0 >> (5 * ch);
+ send = adapter->send[ch];
+ recv = adapter->recv[ch];
+
+ loops = 0;
+ while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
+ loops <= 5) {
+ if (adapter->sume_debug)
+ device_printf(dev, "TX ch %d state %u vect = "
+ "0x%08x\n", ch, send->state, vect);
+ switch (send->state) {
+ case SUME_RIFFA_CHAN_STATE_IDLE:
+ break;
+ case SUME_RIFFA_CHAN_STATE_READY:
+ if (!(vect & SUME_MSI_TXBUF)) {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in send+3 state %u: "
+ "vect = 0x%08x\n", ch, send->state,
+ vect);
+ send->recovery = 1;
+ break;
+ }
+ send->state = SUME_RIFFA_CHAN_STATE_READ;
+ vect &= ~SUME_MSI_TXBUF;
+ break;
+ case SUME_RIFFA_CHAN_STATE_READ:
+ if (!(vect & SUME_MSI_TXDONE)) {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in send+4 state %u: "
+ "vect = 0x%08x\n", ch, send->state,
+ vect);
+ send->recovery = 1;
+ break;
+ }
+ send->state = SUME_RIFFA_CHAN_STATE_LEN;
+
+ len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_RX_TNFR_LEN_REG_OFF));
+ if (ch == SUME_RIFFA_CHANNEL_DATA) {
+ send->state =
+ SUME_RIFFA_CHAN_STATE_IDLE;
+ check_tx_queues(adapter);
+ } else if (ch == SUME_RIFFA_CHANNEL_REG)
+ wakeup(&send->event);
+ else {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in send+4 state %u: "
+ "vect = 0x%08x\n", ch, send->state,
+ vect);
+ send->recovery = 1;
+ }
+ vect &= ~SUME_MSI_TXDONE;
+ break;
+ case SUME_RIFFA_CHAN_STATE_LEN:
+ break;
+ default:
+ device_printf(dev, "unknown TX state!\n");
+ }
+ loops++;
+ }
+
+ if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
+ send->recovery)
+ device_printf(dev, "ch %d ignoring vect = 0x%08x "
+ "during TX; not in recovery; state = %d loops = "
+ "%d\n", ch, vect, send->state, loops);
+
+ loops = 0;
+ while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
+ SUME_MSI_RXDONE)) && loops < 5) {
+ if (adapter->sume_debug)
+ device_printf(dev, "RX ch %d state %u vect = "
+ "0x%08x\n", ch, recv->state, vect);
+ switch (recv->state) {
+ case SUME_RIFFA_CHAN_STATE_IDLE:
+ if (!(vect & SUME_MSI_RXQUE)) {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in recv+0 state %u: "
+ "vect = 0x%08x\n", ch, recv->state,
+ vect);
+ recv->recovery = 1;
+ break;
+ }
+ uint32_t max_ptr;
+
+ /* Clear recovery state. */
+ recv->recovery = 0;
+
+ /* Get offset and length. */
+ recv->offlast = read_reg(adapter,
+ RIFFA_CHNL_REG(ch,
+ RIFFA_TX_OFFLAST_REG_OFF));
+ recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_LEN_REG_OFF));
+
+ /* Boundary checks. */
+ max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
+ + SUME_RIFFA_OFFSET(recv->offlast)
+ + SUME_RIFFA_LEN(recv->len) - 1);
+ if (max_ptr <
+ (uint32_t)((uintptr_t)recv->buf_addr))
+ device_printf(dev, "receive buffer "
+ "wrap-around overflow.\n");
+ if (SUME_RIFFA_OFFSET(recv->offlast) +
+ SUME_RIFFA_LEN(recv->len) >
+ adapter->sg_buf_size)
+ device_printf(dev, "receive buffer too"
+ " small.\n");
+
+ /* Fill the bouncebuf "descriptor". */
+ sume_fill_bb_desc(adapter, recv,
+ SUME_RIFFA_LEN(recv->len));
+
+ bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+ BUS_DMASYNC_PREREAD |
+ BUS_DMASYNC_PREWRITE);
+ write_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_SG_ADDR_LO_REG_OFF),
+ SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_SG_ADDR_HI_REG_OFF),
+ SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_SG_LEN_REG_OFF),
+ 4 * recv->num_sg);
+ bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+ BUS_DMASYNC_POSTREAD |
+ BUS_DMASYNC_POSTWRITE);
+
+ recv->state = SUME_RIFFA_CHAN_STATE_READY;
+ vect &= ~SUME_MSI_RXQUE;
+ break;
+ case SUME_RIFFA_CHAN_STATE_READY:
+ if (!(vect & SUME_MSI_RXBUF)) {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in recv+1 state %u: "
+ "vect = 0x%08x\n", ch, recv->state,
+ vect);
+ recv->recovery = 1;
+ break;
+ }
+ recv->state = SUME_RIFFA_CHAN_STATE_READ;
+ vect &= ~SUME_MSI_RXBUF;
+ break;
+ case SUME_RIFFA_CHAN_STATE_READ:
+ if (!(vect & SUME_MSI_RXDONE)) {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in recv+2 state %u: "
+ "vect = 0x%08x\n", ch, recv->state,
+ vect);
+ recv->recovery = 1;
+ break;
+ }
+ len = read_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_TNFR_LEN_REG_OFF));
+
+ /* Remember, len and recv->len are words. */
+ if (ch == SUME_RIFFA_CHANNEL_DATA) {
+ m = sume_rx_build_mbuf(adapter,
+ len << 2);
+ recv->state =
+ SUME_RIFFA_CHAN_STATE_IDLE;
+ } else if (ch == SUME_RIFFA_CHANNEL_REG)
+ wakeup(&recv->event);
+ else {
+ device_printf(dev, "ch %d unexpected "
+ "interrupt in recv+2 state %u: "
+ "vect = 0x%08x\n", ch, recv->state,
+ vect);
+ recv->recovery = 1;
+ }
+ vect &= ~SUME_MSI_RXDONE;
+ break;
+ case SUME_RIFFA_CHAN_STATE_LEN:
+ break;
+ default:
+ device_printf(dev, "unknown RX state!\n");
+ }
+ loops++;
+ }
+
+ if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
+ SUME_MSI_RXDONE)) && recv->recovery) {
+ device_printf(dev, "ch %d ignoring vect = 0x%08x "
+ "during RX; not in recovery; state = %d, loops = "
+ "%d\n", ch, vect, recv->state, loops);
+
+ /* Clean the unfinished transaction. */
+ if (ch == SUME_RIFFA_CHANNEL_REG &&
+ vect & SUME_MSI_RXDONE) {
+ read_reg(adapter, RIFFA_CHNL_REG(ch,
+ RIFFA_TX_TNFR_LEN_REG_OFF));
+ recv->recovery = 0;
+ }
+ }
+ }
+ SUME_UNLOCK(adapter);
+
+ if (m != NULL) {
+ ifp = m->m_pkthdr.rcvif;
+ (*ifp->if_input)(ifp, m);
+ }
+}
+
+/*
+ * As we cannot disable interrupt generation, ignore early interrupts by waiting
+ * for the adapter to go into the 'running' state.
+ */
+static int
+sume_intr_filter(void *arg)
+{
+ struct sume_adapter *adapter = arg;
+
+ if (adapter->running == 0)
+ return (FILTER_STRAY);
+
+ return (FILTER_SCHEDULE_THREAD);
+}
+
+static int
+sume_probe_riffa_pci(struct sume_adapter *adapter)
+{
+ device_t dev = adapter->dev;
+ int error, count, capmem;
+ uint32_t reg, devctl, linkctl;
+
+ pci_enable_busmaster(dev);
+
+ adapter->rid = PCIR_BAR(0);
+ adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &adapter->rid, RF_ACTIVE);
+ if (adapter->bar0_addr == NULL) {
+ device_printf(dev, "unable to allocate bus resource: "
+ "BAR0 address\n");
+ return (ENXIO);
+ }
+ adapter->bt = rman_get_bustag(adapter->bar0_addr);
+ adapter->bh = rman_get_bushandle(adapter->bar0_addr);
+ adapter->bar0_len = rman_get_size(adapter->bar0_addr);
+ if (adapter->bar0_len != 1024) {
+ device_printf(dev, "BAR0 resource length %lu != 1024\n",
+ adapter->bar0_len);
+ return (ENXIO);
+ }
+
+ count = pci_msi_count(dev);
+ error = pci_alloc_msi(dev, &count);
+ if (error) {
+ device_printf(dev, "unable to allocate bus resource: PCI "
+ "MSI\n");
+ return (error);
+ }
+
+ adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
+ adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
+ if (adapter->irq.res == NULL) {
+ device_printf(dev, "unable to allocate bus resource: IRQ "
+ "memory\n");
+ return (ENXIO);
+ }
+
+ error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
+ INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
+ &adapter->irq.tag);
+ if (error) {
+ device_printf(dev, "failed to setup interrupt for rid %d, name"
+ " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
+ return (ENXIO);
+ }
+
+ if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
+ device_printf(dev, "PCI not PCIe capable\n");
+ return (ENXIO);
+ }
+
+ devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
+ pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
+ PCIEM_CTL_EXT_TAG_FIELD), 2);
+
+ devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
+ pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
+ PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
+
+ linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
+ pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
+ PCIEM_LINK_CTL_RCB), 2);
+
+ reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
+ adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
+ adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
+
+ error = ENODEV;
+ /* Check bus master is enabled. */
+ if (((reg >> 4) & 0x1) != 1) {
+ device_printf(dev, "bus master not enabled: %d\n",
+ (reg >> 4) & 0x1);
+ return (error);
+ }
+ /* Check link parameters are valid. */
+ if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
+ device_printf(dev, "link parameters not valid: %d %d\n",
+ (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
+ return (error);
+ }
+ /* Check # of channels are within valid range. */
+ if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
+ device_printf(dev, "number of channels out of range: %d\n",
+ reg & 0xf);
+ return (error);
+ }
+ /* Check bus width. */
+ if (((reg >> 19) & 0xf) == 0 ||
+ ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
+ device_printf(dev, "bus width out of range: %d\n",
+ (reg >> 19) & 0xf);
+ return (error);
+ }
+
+ device_printf(dev, "[riffa] # of channels: %d\n",
+ reg & 0xf);
+ device_printf(dev, "[riffa] bus interface width: %d\n",
+ ((reg >> 19) & 0xf) << 5);
+ device_printf(dev, "[riffa] bus master enabled: %d\n",
+ (reg >> 4) & 0x1);
+ device_printf(dev, "[riffa] negotiated link width: %d\n",
+ (reg >> 5) & 0x3f);
+ device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
+ ((reg >> 11) & 0x3) * 2500);
+ device_printf(dev, "[riffa] max downstream payload: %d B\n",
+ 128 << ((reg >> 13) & 0x7));
+ device_printf(dev, "[riffa] max upstream payload: %d B\n",
+ 128 << ((reg >> 16) & 0x7));
+
+ return (0);
+}
+
+/* If there is no sume_if_init, the ether_ioctl panics. */
+static void
+sume_if_init(void *sc)
+{
+}
+
+/* Write the address and length for our incoming / outgoing transaction. */
+static void
+sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
+ uint64_t len)
+{
+ struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
+
+ bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
+ bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
+ bouncebuf->len = len >> 2;
+}
+
+/* Module register locked write. */
+static int
+sume_modreg_write_locked(struct sume_adapter *adapter)
+{
+ struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
+
+ /* Let the FPGA know about the transfer. */
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
+ RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
+ RIFFA_RX_LEN_REG_OFF), send->len); /* words */
+
+ /* Fill the bouncebuf "descriptor". */
+ sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
+
+ /* Update the state before intiating the DMA to avoid races. */
+ send->state = SUME_RIFFA_CHAN_STATE_READY;
+
+ bus_dmamap_sync(send->ch_tag, send->ch_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /* DMA. */
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
+ RIFFA_RX_SG_ADDR_LO_REG_OFF),
+ SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
+ RIFFA_RX_SG_ADDR_HI_REG_OFF),
+ SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
+ RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
+ bus_dmamap_sync(send->ch_tag, send->ch_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ return (0);
+}
+
+/*
+ * Request a register read or write (depending on optype).
+ * If optype is set (0x1f) this will result in a register write,
+ * otherwise this will result in a register read request at the given
+ * address and the result will need to be DMAed back.
+ */
+static int
+sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
+ uint32_t optype)
+{
+ struct sume_adapter *adapter = nf_priv->adapter;
+ struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
+ struct nf_regop_data *data;
+ int error;
+
+ /*
+ * 1. Make sure the channel is free; otherwise return EBUSY.
+ * 2. Prepare the memory in the bounce buffer (which we always
+ * use for regs).
+ * 3. Start the DMA process.
+ * 4. Sleep and wait for result and return success or error.
+ */
+ SUME_LOCK(adapter);
+
+ if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
+ SUME_UNLOCK(adapter);
+ return (EBUSY);
+ }
+
+ data = (struct nf_regop_data *) (send->buf_addr +
+ sizeof(struct nf_bb_desc));
+ data->addr = htole32(sifr->addr);
+ data->val = htole32(sifr->val);
+ /* Tag to indentify request. */
+ data->rtag = htole32(++send->rtag);
+ data->optype = htole32(optype);
+ send->len = sizeof(struct nf_regop_data) / 4; /* words */
+
+ error = sume_modreg_write_locked(adapter);
+ if (error) {
+ SUME_UNLOCK(adapter);
+ return (EFAULT);
+ }
+
+ /* Timeout after 1s. */
+ if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
+ error = msleep(&send->event, &adapter->lock, 0,
+ "Waiting recv finish", 1 * hz);
+
+ /* This was a write so we are done; were interrupted, or timed out. */
+ if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
+ send->state = SUME_RIFFA_CHAN_STATE_IDLE;
+ if (optype == SUME_MR_READ)
+ error = EWOULDBLOCK;
+ else
+ error = 0;
+ } else
+ error = 0;
+
+ /*
+ * For read requests we will update state once we are done
+ * having read the result to avoid any two outstanding
+ * transactions, or we need a queue and validate tags,
+ * which is a lot of work for a low priority, infrequent
+ * event.
+ */
+
+ SUME_UNLOCK(adapter);
+
+ return (error);
+}
+
+/* Module register read. */
+static int
+sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
+{
+ struct sume_adapter *adapter = nf_priv->adapter;
+ struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
+ struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
+ struct nf_regop_data *data;
+ int error = 0;
+
+ /*
+ * 0. Sleep waiting for result if needed (unless condition is
+ * true already).
+ * 1. Read DMA results.
+ * 2. Update state on *TX* to IDLE to allow next read to start.
+ */
+ SUME_LOCK(adapter);
+
+ bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /*
+ * We only need to be woken up at the end of the transaction.
+ * Timeout after 1s.
+ */
+ if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
+ error = msleep(&recv->event, &adapter->lock, 0,
+ "Waiting transaction finish", 1 * hz);
+
+ if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
+ SUME_UNLOCK(adapter);
+ device_printf(adapter->dev, "wait error: %d\n", error);
+ return (EWOULDBLOCK);
+ }
+
+ bus_dmamap_sync(recv->ch_tag, recv->ch_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ /*
+ * Read reply data and validate address and tag.
+ * Note: we do access the send side without lock but the state
+ * machine does prevent the data from changing.
+ */
+ data = (struct nf_regop_data *) (recv->buf_addr +
+ sizeof(struct nf_bb_desc));
+
+ if (le32toh(data->rtag) != send->rtag)
+ device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
+ le32toh(data->rtag), send->rtag);
+
+ sifr->val = le32toh(data->val);
+ recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
+
+ /* We are done. */
+ send->state = SUME_RIFFA_CHAN_STATE_IDLE;
+
+ SUME_UNLOCK(adapter);
+
+ return (0);
+}
+
+/* Read value from a module register and return it to a sume_ifreq. */
+static int
+get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
+{
+ int error;
+
+ error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
+ if (!error)
+ error = sume_module_reg_read(nf_priv, sifr);
+
+ return (error);
+}
+
+static int
+sume_if_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *) data;
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct sume_ifreq sifr;
+ int error = 0;
+
+ switch (cmd) {
+ case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
+ break;
+
+ case SUME_IOCTL_CMD_WRITE_REG:
+ error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
+ if (error) {
+ error = EINVAL;
+ break;
+ }
+ error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
+ break;
+
+ case SUME_IOCTL_CMD_READ_REG:
+ error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
+ if (error) {
+ error = EINVAL;
+ break;
+ }
+
+ error = get_modreg_value(nf_priv, &sifr);
+ if (error)
+ break;
+
+ error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
+ if (error)
+ error = EINVAL;
+
+ break;
+
+ case SIOCSIFFLAGS:
+ /* Silence tcpdump 'promisc mode not supported' warning. */
+ if (ifp->if_flags & IFF_PROMISC)
+ break;
+
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+sume_media_change(struct ifnet *ifp)
+{
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct ifmedia *ifm = &nf_priv->media;
+
+ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
+ return (EINVAL);
+
+ if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
+ ifp->if_baudrate = ifmedia_baudrate(IFM_ETHER | IFM_10G_SR);
+ else
+ ifp->if_baudrate = ifmedia_baudrate(ifm->ifm_media);
+
+ return (0);
+}
+
+static void
+sume_update_link_status(struct ifnet *ifp)
+{
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct sume_adapter *adapter = nf_priv->adapter;
+ struct sume_ifreq sifr;
+ int link_status;
+
+ sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
+ sifr.val = 0;
+
+ if (get_modreg_value(nf_priv, &sifr))
+ return;
+
+ link_status = SUME_LINK_STATUS(sifr.val);
+
+ if (!link_status && nf_priv->link_up) {
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ nf_priv->link_up = 0;
+ if (adapter->sume_debug)
+ device_printf(adapter->dev, "port %d link state "
+ "changed to DOWN\n", nf_priv->unit);
+ } else if (link_status && !nf_priv->link_up) {
+ nf_priv->link_up = 1;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ if (adapter->sume_debug)
+ device_printf(adapter->dev, "port %d link state "
+ "changed to UP\n", nf_priv->unit);
+ }
+}
+
+static void
+sume_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct ifmedia *ifm = &nf_priv->media;
+
+ if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
+ (ifp->if_flags & IFF_UP))
+ ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
+ else
+ ifmr->ifm_active = ifm->ifm_cur->ifm_media;
+
+ ifmr->ifm_status |= IFM_AVALID;
+
+ sume_update_link_status(ifp);
+
+ if (nf_priv->link_up)
+ ifmr->ifm_status |= IFM_ACTIVE;
+}
+
+/*
+ * Packet to transmit. We take the packet data from the mbuf and copy it to the
+ * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
+ * packet data are for metadata: sport/dport (depending on our source
+ * interface), packet length and magic 0xcafe. We tell the SUME about the
+ * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
+ * the information about the start and length of the packet and trigger the
+ * transaction.
+ */
+static int
+sume_if_start_locked(struct ifnet *ifp)
+{
+ struct mbuf *m;
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct sume_adapter *adapter = nf_priv->adapter;
+ struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
+ uint8_t *outbuf;
+ struct nf_metadata *mdata;
+ int plen = SUME_MIN_PKT_SIZE;
+
+ KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
+ KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
+ ("SUME not in IDLE state"));
+
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m == NULL)
+ return (EINVAL);
+
+ /* Packets large enough do not need to be padded */
+ if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
+ plen = m->m_pkthdr.len;
+
+ if (adapter->sume_debug)
+ device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
+ SUME_ETH_DEVICE_NAME, nf_priv->unit);
+
+ outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
+ mdata = (struct nf_metadata *) outbuf;
+
+ /* Clear the recovery flag. */
+ send->recovery = 0;
+
+ /* Make sure we fit with the 16 bytes nf_metadata. */
+ if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
+ adapter->sg_buf_size) {
+ device_printf(adapter->dev, "packet too big for bounce buffer "
+ "(%d)\n", m->m_pkthdr.len);
+ m_freem(m);
+ nf_priv->stats.tx_dropped++;
+ return (ENOMEM);
+ }
+
+ bus_dmamap_sync(send->ch_tag, send->ch_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+ /* Zero out the padded data */
+ if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
+ bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
+ /* Skip the first 16 bytes for the metadata. */
+ m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
+ send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
+
+ /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
+ mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
+ mdata->dport = htole16(1 << (nf_priv->port * 2));
+ mdata->plen = htole16(plen);
+ mdata->magic = htole16(SUME_RIFFA_MAGIC);
+ mdata->t1 = htole32(0);
+ mdata->t2 = htole32(0);
+
+ /* Let the FPGA know about the transfer. */
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
+ RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
+ RIFFA_RX_LEN_REG_OFF), send->len);
+
+ /* Fill the bouncebuf "descriptor". */
+ sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
+
+ /* Update the state before intiating the DMA to avoid races. */
+ send->state = SUME_RIFFA_CHAN_STATE_READY;
+
+ /* DMA. */
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
+ RIFFA_RX_SG_ADDR_LO_REG_OFF),
+ SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
+ RIFFA_RX_SG_ADDR_HI_REG_OFF),
+ SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
+ write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
+ RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
+
+ bus_dmamap_sync(send->ch_tag, send->ch_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ nf_priv->stats.tx_packets++;
+ nf_priv->stats.tx_bytes += plen;
+
+ /* We can free as long as we use the bounce buffer. */
+ m_freem(m);
+
+ adapter->last_ifc = nf_priv->port;
+
+ /* Reset watchdog counter. */
+ adapter->wd_counter = 0;
+
+ return (0);
+}
+
+static void
+sume_if_start(struct ifnet *ifp)
+{
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct sume_adapter *adapter = nf_priv->adapter;
+
+ if (!adapter->running || !(ifp->if_flags & IFF_UP))
+ return;
+
+ SUME_LOCK(adapter);
+ if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
+ SUME_RIFFA_CHAN_STATE_IDLE)
+ sume_if_start_locked(ifp);
+ SUME_UNLOCK(adapter);
+}
+
+/*
+ * We call this function at the end of every TX transaction to check for
+ * remaining packets in the TX queues for every UP interface.
+ */
+static void
+check_tx_queues(struct sume_adapter *adapter)
+{
+ int i, last_ifc;
+
+ KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
+
+ last_ifc = adapter->last_ifc;
+
+ /* Check all interfaces */
+ for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
+ struct ifnet *ifp = adapter->ifp[i % SUME_NPORTS];
+
+ if (!(ifp->if_flags & IFF_UP))
+ continue;
+
+ if (!sume_if_start_locked(ifp))
+ break;
+ }
+}
+
+static int
+sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
+{
+ struct ifnet *ifp;
+ struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
+ M_ZERO | M_WAITOK);
+
+ ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ device_printf(adapter->dev, "cannot allocate ifnet\n");
+ return (ENOMEM);
+ }
+
+ adapter->ifp[port] = ifp;
+ ifp->if_softc = nf_priv;
+
+ nf_priv->adapter = adapter;
+ nf_priv->unit = alloc_unr(unr);
+ nf_priv->port = port;
+ nf_priv->link_up = 0;
+
+ if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+
+ ifp->if_init = sume_if_init;
+ ifp->if_start = sume_if_start;
+ ifp->if_ioctl = sume_if_ioctl;
+
+ uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
+ hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
+ ether_ifattach(ifp, hw_addr);
+
+ ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
+ sume_media_status);
+ ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
+ ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+
+ return (0);
+}
+
+static void
+callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
+{
+ if (err)
+ return;
+
+ KASSERT(nseg == 1, ("%d segments returned!", nseg));
+
+ *(bus_addr_t *) arg = segs[0].ds_addr;
+}
+
+static int
+sume_probe_riffa_buffer(const struct sume_adapter *adapter,
+ struct riffa_chnl_dir ***p, const char *dir)
+{
+ struct riffa_chnl_dir **rp;
+ bus_addr_t hw_addr;
+ int error, ch;
+ device_t dev = adapter->dev;
+
+ error = ENOMEM;
+ *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
+ M_SUME, M_ZERO | M_WAITOK);
+ if (*p == NULL) {
+ device_printf(dev, "malloc(%s) failed.\n", dir);
+ return (error);
+ }
+
+ rp = *p;
+ /* Allocate the chnl_dir structs themselves. */
+ for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
+ /* One direction. */
+ rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
+ M_ZERO | M_WAITOK);
+ if (rp[ch] == NULL) {
+ device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir "
+ "failed.\n", dir, ch);
+ return (error);
+ }
+
+ int err = bus_dma_tag_create(bus_get_dma_tag(dev),
+ 4, 0,
+ BUS_SPACE_MAXADDR,
+ BUS_SPACE_MAXADDR,
+ NULL, NULL,
+ adapter->sg_buf_size,
+ 1,
+ adapter->sg_buf_size,
+ 0,
+ NULL,
+ NULL,
+ &rp[ch]->ch_tag);
+
+ if (err) {
+ device_printf(dev, "bus_dma_tag_create(%s[%d]) "
+ "failed.\n", dir, ch);
+ return (err);
+ }
+
+ err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
+ &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
+ BUS_DMA_ZERO, &rp[ch]->ch_map);
+ if (err) {
+ device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
+ dir, ch);
+ return (err);
+ }
+
+ bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
+
+ err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
+ rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
+ &hw_addr, BUS_DMA_NOWAIT);
+ if (err) {
+ device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
+ dir, ch);
+ return (err);
+ }
+ rp[ch]->buf_hw_addr = hw_addr;
+ rp[ch]->num_sg = 1;
+ rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
+
+ rp[ch]->rtag = SUME_INIT_RTAG;
+ }
+
+ return (0);
+}
+
+static int
+sume_probe_riffa_buffers(struct sume_adapter *adapter)
+{
+ int error;
+
+ error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
+ if (error)
+ return (error);
+
+ error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
+
+ return (error);
+}
+
+static void
+sume_sysctl_init(struct sume_adapter *adapter)
+{
+ device_t dev = adapter->dev;
+ struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
+ struct sysctl_oid *tree = device_get_sysctl_tree(dev);
+ struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
+ struct sysctl_oid *tmp_tree;
+ char namebuf[MAX_IFC_NAME_LEN];
+ int i;
+
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
+ 0, "SUME top-level tree");
+ if (tree == NULL) {
+ device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
+ return;
+ }
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
+ &adapter->sume_debug, 0, "debug int leaf");
+
+ /* total RX error stats */
+ SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
+ CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
+ SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
+ CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
+
+ for (i = SUME_NPORTS - 1; i >= 0; i--) {
+ struct ifnet *ifp = adapter->ifp[i];
+ if (ifp == NULL)
+ continue;
+
+ struct nf_priv *nf_priv = ifp->if_softc;
+
+ snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
+ SUME_ETH_DEVICE_NAME, nf_priv->unit);
+ tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RW, 0, "SUME ifc tree");
+ if (tmp_tree == NULL) {
+ device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
+ return;
+ }
+
+ /* Packets dropped by down interface. */
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "ifc_down_bytes", CTLFLAG_RD,
+ &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "ifc_down_packets", CTLFLAG_RD,
+ &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
+
+ /* HW RX stats */
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
+ 0, "hw_rx packets");
+
+ /* HW TX stats */
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
+ 0, "hw_tx packets");
+
+ /* RX stats */
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
+ "rx bytes");
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
+ "rx dropped");
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
+ "rx packets");
+
+ /* TX stats */
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
+ "tx bytes");
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
+ "tx dropped");
+ SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
+ "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
+ "tx packets");
+ }
+}
+
+static void
+sume_local_timer(void *arg)
+{
+ struct sume_adapter *adapter = arg;
+
+ if (!adapter->running)
+ return;
+
+ taskqueue_enqueue(adapter->tq, &adapter->stat_task);
+
+ SUME_LOCK(adapter);
+ if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
+ SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
+ /* Resetting interfaces if stuck for 3 seconds. */
+ device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
+ read_reg(adapter, RIFFA_INFO_REG_OFF);
+
+ adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
+ SUME_RIFFA_CHAN_STATE_IDLE;
+ adapter->wd_counter = 0;
+
+ check_tx_queues(adapter);
+ }
+ SUME_UNLOCK(adapter);
+
+ callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
+}
+
+static void
+sume_get_stats(void *context, int pending)
+{
+ struct sume_adapter *adapter = context;
+ int i;
+
+ for (i = 0; i < SUME_NPORTS; i++) {
+ struct ifnet *ifp = adapter->ifp[i];
+
+ if (ifp->if_flags & IFF_UP) {
+ struct nf_priv *nf_priv = ifp->if_softc;
+ struct sume_ifreq sifr;
+
+ sume_update_link_status(ifp);
+
+ /* Get RX counter. */
+ sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
+ sifr.val = 0;
+
+ if (!get_modreg_value(nf_priv, &sifr))
+ nf_priv->stats.hw_rx_packets += sifr.val;
+
+ /* Get TX counter. */
+ sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
+ sifr.val = 0;
+
+ if (!get_modreg_value(nf_priv, &sifr))
+ nf_priv->stats.hw_tx_packets += sifr.val;
+ }
+ }
+}
+
+static int
+sume_attach(device_t dev)
+{
+ struct sume_adapter *adapter = device_get_softc(dev);
+ adapter->dev = dev;
+ int error, i;
+
+ mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
+
+ adapter->running = 0;
+
+ /* OK finish up RIFFA. */
+ error = sume_probe_riffa_pci(adapter);
+ if (error != 0)
+ goto error;
+
+ error = sume_probe_riffa_buffers(adapter);
+ if (error != 0)
+ goto error;
+
+ /* Now do the network interfaces. */
+ for (i = 0; i < SUME_NPORTS; i++) {
+ error = sume_ifp_alloc(adapter, i);
+ if (error != 0)
+ goto error;
+ }
+
+ /* Register stats and register sysctls. */
+ sume_sysctl_init(adapter);
+
+ /* Reset the HW. */
+ read_reg(adapter, RIFFA_INFO_REG_OFF);
+
+ /* Ready to go, "enable" IRQ. */
+ adapter->running = 1;
+
+ callout_init(&adapter->timer, 1);
+ TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
+
+ adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
+ taskqueue_thread_enqueue, &adapter->tq);
+ taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
+ device_get_nameunit(adapter->dev));
+
+ callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
+
+ return (0);
+
+error:
+ sume_detach(dev);
+
+ return (error);
+}
+
+static void
+sume_remove_riffa_buffer(const struct sume_adapter *adapter,
+ struct riffa_chnl_dir **pp)
+{
+ int ch;
+
+ for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
+ if (pp[ch] == NULL)
+ continue;
+
+ if (pp[ch]->buf_hw_addr != 0) {
+ bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
+ pp[ch]->ch_map);
+ pp[ch]->buf_hw_addr = 0;
+ }
+
+ free(pp[ch], M_SUME);
+ }
+}
+
+static void
+sume_remove_riffa_buffers(struct sume_adapter *adapter)
+{
+ if (adapter->send != NULL) {
+ sume_remove_riffa_buffer(adapter, adapter->send);
+ free(adapter->send, M_SUME);
+ adapter->send = NULL;
+ }
+ if (adapter->recv != NULL) {
+ sume_remove_riffa_buffer(adapter, adapter->recv);
+ free(adapter->recv, M_SUME);
+ adapter->recv = NULL;
+ }
+}
+
+static int
+sume_detach(device_t dev)
+{
+ struct sume_adapter *adapter = device_get_softc(dev);
+ int i;
+ struct nf_priv *nf_priv;
+
+ KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
+ "initialized"));
+ adapter->running = 0;
+
+ /* Drain the stats callout and task queue. */
+ callout_drain(&adapter->timer);
+
+ if (adapter->tq) {
+ taskqueue_drain(adapter->tq, &adapter->stat_task);
+ taskqueue_free(adapter->tq);
+ }
+
+ for (i = 0; i < SUME_NPORTS; i++) {
+ struct ifnet *ifp = adapter->ifp[i];
+ if (ifp == NULL)
+ continue;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ nf_priv = ifp->if_softc;
+
+ if (ifp->if_flags & IFF_UP)
+ if_down(ifp);
+ ifmedia_removeall(&nf_priv->media);
+ free_unr(unr, nf_priv->unit);
+
+ ifp->if_flags &= ~IFF_UP;
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+ free(nf_priv, M_SUME);
+ }
+
+ sume_remove_riffa_buffers(adapter);
+
+ if (adapter->irq.tag)
+ bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
+ if (adapter->irq.res)
+ bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
+ adapter->irq.res);
+
+ pci_release_msi(dev);
+
+ if (adapter->bar0_addr)
+ bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
+ adapter->bar0_addr);
+
+ mtx_destroy(&adapter->lock);
+
+ return (0);
+}
+
+static int
+mod_event(module_t mod, int cmd, void *arg)
+{
+ switch (cmd) {
+ case MOD_LOAD:
+ unr = new_unrhdr(0, INT_MAX, NULL);
+ break;
+
+ case MOD_UNLOAD:
+ delete_unrhdr(unr);
+ break;
+ }
+
+ return (0);
+}
+static devclass_t sume_devclass;
+
+DRIVER_MODULE(sume, pci, sume_driver, sume_devclass, mod_event, 0);
+MODULE_VERSION(sume, 1);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 9b0bd33df49c..35f491d251fe 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -345,6 +345,7 @@ SUBDIR= \
${_sppp} \
ste \
stge \
+ ${_sume} \
${_superio} \
${_sym} \
${_syscons} \
@@ -719,6 +720,7 @@ _nvdimm= nvdimm
_pms= pms
_qlxge= qlxge
_qlxgb= qlxgb
+_sume= sume
_vmd= vmd
.if ${MK_SOURCELESS_UCODE} != "no"
_qlxgbe= qlxgbe
diff --git a/sys/modules/sume/Makefile b/sys/modules/sume/Makefile
new file mode 100644
index 000000000000..35f3abd6ca65
--- /dev/null
+++ b/sys/modules/sume/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/dev/sume
+
+KMOD= if_sume
+SRCS= if_sume.c
+SRCS+= device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>