aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet/tcp_lro.h
blob: 3eefd4f0537c957d3cba1bd14d7ae575c8fae979 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2006, Myricom Inc.
 * Copyright (c) 2008, Intel Corporation.
 * Copyright (c) 2016-2021 Mellanox Technologies.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 */

#ifndef _TCP_LRO_H_
#define _TCP_LRO_H_

#include <sys/time.h>

#ifndef TCP_LRO_ENTRIES
/* Define default number of LRO entries per RX queue */
#define	TCP_LRO_ENTRIES	8
#endif

/*
 * Flags for ACK entry for compression
 * the bottom 8 bits has the th_flags.
 * LRO itself adds only the TSTMP flags
 * to indicate if either of the types
 * of timestamps are filled and the
 * HAS_TSTMP option to indicate if the
 * TCP timestamp option is valid.
 *
 * The other 5 flag bits are for processing
 * by a stack.
 *
 */
#define TSTMP_LRO		0x0100
#define TSTMP_HDWR		0x0200
#define HAS_TSTMP		0x0400
/*
 * Default number of interrupts on the same cpu in a row
 * that will cause us to declare a "affinity cpu".
 */
#define TCP_LRO_CPU_DECLARATION_THRESH 50

struct inpcb;

union lro_address {
	u_long raw[1];
	struct {
		uint8_t lro_type;	/* internal */
#define	LRO_TYPE_NONE     0
#define	LRO_TYPE_IPV4_TCP 1
#define	LRO_TYPE_IPV6_TCP 2
#define	LRO_TYPE_IPV4_UDP 3
#define	LRO_TYPE_IPV6_UDP 4
		uint8_t lro_flags;
#define	LRO_FLAG_DECRYPTED 1
		uint16_t vlan_id;	/* VLAN identifier */
		uint16_t s_port;	/* source TCP/UDP port */
		uint16_t d_port;	/* destination TCP/UDP port */
		uint32_t vxlan_vni;	/* VXLAN virtual network identifier */
		union {
#ifdef INET
			struct in_addr v4;
#endif
#ifdef INET6
			struct in6_addr v6;
#endif
		} s_addr;	/* source IPv4/IPv6 address */
		union {
#ifdef INET
			struct in_addr v4;
#endif
#ifdef INET6
			struct in6_addr v6;
#endif
		} d_addr;	/* destination IPv4/IPv6 address */
	};
} __aligned(sizeof(u_long));

#define	LRO_RAW_ADDRESS_MAX \
    (sizeof(union lro_address) / sizeof(u_long))

/* Optimize address comparison by comparing one unsigned long at a time: */

static inline bool
lro_address_compare(const union lro_address *pa, const union lro_address *pb)
{
	if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) {
		return (true);
	} else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) {
		if (pa->raw[i] != pb->raw[i])
			return (false);
	}
	return (true);
}

struct lro_parser {
	union lro_address data;
	union {
		uint8_t *l3;
		struct ip *ip4;
		struct ip6_hdr *ip6;
	};
	union {
		uint8_t *l4;
		struct tcphdr *tcp;
		struct udphdr *udp;
	};
	uint16_t total_hdr_len;
};

/* This structure is zeroed frequently, try to keep it small. */
struct lro_entry {
	LIST_ENTRY(lro_entry)	next;
	LIST_ENTRY(lro_entry)	hash_next;
	struct mbuf		*m_head;
	struct mbuf		*m_tail;
	struct mbuf		*m_last_mbuf;
	struct lro_parser	outer;
	struct lro_parser	inner;
	uint32_t		next_seq;	/* tcp_seq */
	uint32_t		ack_seq;	/* tcp_seq */
	uint32_t		tsval;
	uint32_t		tsecr;
	uint16_t		compressed;
	uint16_t		uncompressed;
	uint16_t		window;
	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
	struct bintime		alloc_time;	/* time when entry was allocated */
};

LIST_HEAD(lro_head, lro_entry);

struct lro_mbuf_sort {
	uint64_t seq;
	struct mbuf *mb;
};

/* NB: This is part of driver structs. */
struct lro_ctrl {
	struct ifnet	*ifp;
	struct lro_mbuf_sort *lro_mbuf_data;
	struct bintime	lro_last_queue_time;	/* last time data was queued */
	uint64_t	lro_queued;
	uint64_t	lro_flushed;
	uint64_t	lro_bad_csum;
	unsigned	lro_cnt;
	unsigned	lro_mbuf_count;
	unsigned	lro_mbuf_max;
	unsigned short	lro_ackcnt_lim;		/* max # of aggregated ACKs */
	unsigned short	lro_cpu;		/* Guess at the cpu we have affinity too */
	unsigned 	lro_length_lim;		/* max len of aggregated data */
	u_long		lro_hashsz;
	uint32_t	lro_last_cpu;
	uint32_t 	lro_cnt_of_same_cpu;
	struct lro_head	*lro_hash;
	struct lro_head	lro_active;
	struct lro_head	lro_free;
	uint8_t		lro_cpu_is_set;		/* Flag to say its ok to set the CPU on the inp */
};

struct tcp_ackent {
	uint64_t timestamp;	/* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */
	uint32_t seq;		/* th_seq value */
	uint32_t ack;		/* th_ack value */
	uint32_t ts_value;	/* If ts option value, valid if HAS_TSTMP is set */
	uint32_t ts_echo;	/* If ts option echo, valid if HAS_TSTMP is set */
	uint16_t win;		/* TCP window */
	uint16_t flags;		/* Flags to say if TS is present and type of timestamp and th_flags */
	uint8_t  codepoint;	/* IP level codepoint including ECN bits */
	uint8_t  ack_val_set;	/* Classification of ack used by the stack */
	uint8_t  pad[2];	/* To 32 byte boundary */
};

/* We use two M_PROTO on the mbuf */
#define M_ACKCMP	M_PROTO4   /* Indicates LRO is sending in a  Ack-compression mbuf */
#define M_LRO_EHDRSTRP	M_PROTO6   /* Indicates that LRO has stripped the etherenet header */

#define	TCP_LRO_LENGTH_MAX	(65535 - 255)	/* safe value with room for outer headers */
#define	TCP_LRO_ACKCNT_MAX	65535		/* unlimited */

int tcp_lro_init(struct lro_ctrl *);
int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
void tcp_lro_free(struct lro_ctrl *);
void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
void tcp_lro_flush_all(struct lro_ctrl *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
void tcp_lro_reg_mbufq(void);
void tcp_lro_dereg_mbufq(void);

#define	TCP_LRO_NO_ENTRIES	-2
#define	TCP_LRO_CANNOT		-1
#define	TCP_LRO_NOT_SUPPORTED	1

#endif /* _TCP_LRO_H_ */