/*- * Copyright (c) 2015 * Jonathan Looney. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #define M_LEADINGSPACE_NOWRITE(m) \ ((m)->m_data - M_START(m)) int tcp_pcap_aggressive_free = 1; static int tcp_pcap_clusters_referenced_cur = 0; static int tcp_pcap_clusters_referenced_max = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free, CTLFLAG_RW, &tcp_pcap_aggressive_free, 0, "Free saved packets when the memory system comes under pressure"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur, CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0, "Number of clusters currently referenced on TCP PCAP queues"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max, CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0, "Maximum number of clusters allowed to be referenced on TCP PCAP " "queues"); static int tcp_pcap_alloc_reuse_ext = 0; static int tcp_pcap_alloc_reuse_mbuf = 0; static int tcp_pcap_alloc_new_mbuf = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext, CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0, "Number of mbufs with external storage reused for the TCP PCAP " "functionality"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf, CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0, "Number of mbufs with internal storage reused for the TCP PCAP " "functionality"); SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf, CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0, "Number of new mbufs allocated for the TCP PCAP functionality"); VNET_DEFINE(int, tcp_pcap_packets) = 0; #define V_tcp_pcap_packets VNET(tcp_pcap_packets) SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0, "Default number of packets saved per direction per TCPCB"); /* Initialize the values. */ static void tcp_pcap_max_set(void) { tcp_pcap_clusters_referenced_max = nmbclusters / 4; } void tcp_pcap_init(void) { tcp_pcap_max_set(); EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set, NULL, EVENTHANDLER_PRI_ANY); } /* * If we are below the maximum allowed cluster references, * increment the reference count and return TRUE. Otherwise, * leave the reference count alone and return FALSE. */ static __inline bool tcp_pcap_take_cluster_reference(void) { if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >= tcp_pcap_clusters_referenced_max) { atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1); return FALSE; } return TRUE; } /* * For all the external entries in m, apply the given adjustment. * This can be used to adjust the counter when an mbuf chain is * copied or freed. */ static __inline void tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj) { while (m) { if (m->m_flags & M_EXT) atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj); m = m->m_next; } } /* * Free all mbufs in a chain, decrementing the reference count as * necessary. * * Functions in this file should use this instead of m_freem() when * they are freeing mbuf chains that may contain clusters that were * already included in tcp_pcap_clusters_referenced_cur. */ static void tcp_pcap_m_freem(struct mbuf *mb) { while (mb != NULL) { if (mb->m_flags & M_EXT) atomic_subtract_int(&tcp_pcap_clusters_referenced_cur, 1); mb = m_free(mb); } } /* * Copy data from m to n, where n cannot fit all the data we might * want from m. * * Prioritize data like this: * 1. TCP header * 2. IP header * 3. Data */ static void tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n) { struct mbuf *m_cur = m; int bytes_to_copy=0, trailing_data, skip=0, tcp_off; /* Below, we assume these will be non-NULL. */ KASSERT(th, ("%s: called with th == NULL", __func__)); KASSERT(m, ("%s: called with m == NULL", __func__)); KASSERT(n, ("%s: called with n == NULL", __func__)); /* We assume this initialization occurred elsewhere. */ KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)", __func__, n->m_len)); KASSERT(n->m_data == M_START(n), ("%s: called with n->m_data != M_START(n)", __func__)); /* * Calculate the size of the TCP header. We use this often * enough that it is worth just calculating at the start. */ tcp_off = th->th_off << 2; /* Trim off leading empty mbufs. */ while (m && m->m_len == 0) m = m->m_next; if (m) { m_cur = m; } else { /* * No data? Highly unusual. We would expect to at * least see a TCP header in the mbuf. * As we have a pointer to the TCP header, I guess * we should just copy that. (???) */ fallback: bytes_to_copy = tcp_off; if (bytes_to_copy > M_SIZE(n)) bytes_to_copy = M_SIZE(n); bcopy(th, n->m_data, bytes_to_copy); n->m_len = bytes_to_copy; return; } /* * Find TCP header. Record the total number of bytes up to, * and including, the TCP header. */ while (m_cur) { if ((caddr_t) th >= (caddr_t) m_cur->m_data && (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len)) break; bytes_to_copy += m_cur->m_len; m_cur = m_cur->m_next; } if (m_cur) bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data; else goto fallback; bytes_to_copy += tcp_off; /* * If we already want to copy more bytes than we can hold * in the destination mbuf, skip leading bytes and copy * what we can. * * Otherwise, consider trailing data. */ if (bytes_to_copy > M_SIZE(n)) { skip = bytes_to_copy - M_SIZE(n); bytes_to_copy = M_SIZE(n); } else { /* * Determine how much trailing data is in the chain. * We start with the length of this mbuf (the one * containing th) and subtract the size of the TCP * header (tcp_off) and the size of the data prior * to th (th - m_cur->m_data). * * This *should not* be negative, as the TCP code * should put the whole TCP header in a single * mbuf. But, it isn't a problem if it is. We will * simple work off our negative balance as we look * at subsequent mbufs. */ trailing_data = m_cur->m_len - tcp_off; trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data; m_cur = m_cur->m_next; while (m_cur) { trailing_data += m_cur->m_len; m_cur = m_cur->m_next; } if ((bytes_to_copy + trailing_data) > M_SIZE(n)) bytes_to_copy = M_SIZE(n); else bytes_to_copy += trailing_data; } m_copydata(m, skip, bytes_to_copy, n->m_data); n->m_len = bytes_to_copy; } void tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue) { struct mbuf *n = NULL, *mhead; KASSERT(th, ("%s: called with th == NULL", __func__)); KASSERT(m, ("%s: called with m == NULL", __func__)); KASSERT(queue, ("%s: called with queue == NULL", __func__)); /* We only care about data packets. */ while (m && m->m_type != MT_DATA) m = m->m_next; /* We only need to do something if we still have an mbuf. */ if (!m) return; /* If we are not saving mbufs, return now. */ if (queue->mq_maxlen == 0) return; /* * Check to see if we will need to recycle mbufs. * * If we need to get rid of mbufs to stay below * our packet count, try to reuse the mbuf. Once * we already have a new mbuf (n), then we can * simply free subsequent mbufs. * * Note that most of the logic in here is to deal * with the reuse. If we are fine with constant * mbuf allocs/deallocs, we could ditch this logic. * But, it only seems to make sense to reuse * mbufs we already have. */ while (mbufq_full(queue)) { mhead = mbufq_dequeue(queue); if (n) { tcp_pcap_m_freem(mhead); } else { /* * If this held an external cluster, try to * detach the cluster. But, if we held the * last reference, go through the normal * free-ing process. */ if (mhead->m_flags & M_EXTPG) { /* Don't mess around with these. */ tcp_pcap_m_freem(mhead); continue; } else if (mhead->m_flags & M_EXT) { switch (mhead->m_ext.ext_type) { case EXT_SFBUF: /* Don't mess around with these. */ tcp_pcap_m_freem(mhead); continue; default: if (atomic_fetchadd_int( mhead->m_ext.ext_cnt, -1) == 1) { /* * We held the last reference * on this cluster. Restore * the reference count and put * it back in the pool. */ *(mhead->m_ext.ext_cnt) = 1; tcp_pcap_m_freem(mhead); continue; } /* * We were able to cleanly free the * reference. */ atomic_subtract_int( &tcp_pcap_clusters_referenced_cur, 1); tcp_pcap_alloc_reuse_ext++; break; } } else { tcp_pcap_alloc_reuse_mbuf++; } n = mhead; tcp_pcap_m_freem(n->m_next); m_init(n, M_NOWAIT, MT_DATA, 0); } } /* Check to see if we need to get a new mbuf. */ if (!n) { if (!(n = m_get(M_NOWAIT, MT_DATA))) return; tcp_pcap_alloc_new_mbuf++; } /* * What are we dealing with? If a cluster, attach it. Otherwise, * try to copy the data from the beginning of the mbuf to the * end of data. (There may be data between the start of the data * area and the current data pointer. We want to get this, because * it may contain header information that is useful.) * In cases where that isn't possible, settle for what we can * get. */ if ((m->m_flags & (M_EXT|M_EXTPG)) && tcp_pcap_take_cluster_reference()) { n->m_data = m->m_data; n->m_len = m->m_len; mb_dupcl(n, m); } else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) { /* * At this point, n is guaranteed to be a normal mbuf * with no cluster and no packet header. Because the * logic in this code block requires this, the assert * is here to catch any instances where someone * changes the logic to invalidate that assumption. */ KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0, ("%s: Unexpected flags (%#x) for mbuf", __func__, n->m_flags)); n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m); n->m_len = m->m_len; if (m->m_flags & M_EXTPG) m_copydata(m, 0, m->m_len, n->m_data); else bcopy(M_START(m), n->m_dat, m->m_len + M_LEADINGSPACE_NOWRITE(m)); } else { /* * This is the case where we need to "settle for what * we can get". The most probable way to this code * path is that we've already taken references to the * maximum number of mbuf clusters we can, and the data * is too long to fit in an mbuf's internal storage. * Try for a "best fit". */ tcp_pcap_copy_bestfit(th, m, n); /* Don't try to get additional data. */ goto add_to_queue; } if (m->m_next) { n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT); tcp_pcap_adj_cluster_reference(n->m_next, 1); } add_to_queue: /* Add the new mbuf to the list. */ if (mbufq_enqueue(queue, n)) { /* This shouldn't happen. If INVARIANTS is defined, panic. */ KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__)); tcp_pcap_m_freem(n); } } void tcp_pcap_drain(struct mbufq *queue) { struct mbuf *m; while ((m = mbufq_dequeue(queue))) tcp_pcap_m_freem(m); } void tcp_pcap_tcpcb_init(struct tcpcb *tp) { mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets); mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets); } void tcp_pcap_set_sock_max(struct mbufq *queue, int newval) { queue->mq_maxlen = newval; while (queue->mq_len > queue->mq_maxlen) tcp_pcap_m_freem(mbufq_dequeue(queue)); } int tcp_pcap_get_sock_max(struct mbufq *queue) { return queue->mq_maxlen; }