aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristof Provost <kp@FreeBSD.org>2025-02-03 21:39:35 +0000
committerKristof Provost <kp@FreeBSD.org>2025-02-04 20:56:06 +0000
commitfffedd81a4bf86b1f77fc4ba0d170e7ef73d552c (patch)
tree4a7b16941274e2757aa845a21b5f98f970326b84
parent1f4c3887e3fad411914ddc729fce175d5613e2b4 (diff)
pf: send ICMP destination unreachable fragmentation needed when appropriate
Just like we do for IPv6, generate an ICMP fragmentation needed packet if we're going to need fragmenation for IPv4 as well (i.e. DF is set). Do so before full processing, so we generate it with pre-NAT addreses, just as we do for IPv6. Sponsored by: Rubicon Communications, LLC ("Netgate") Differential Revision: https://reviews.freebsd.org/D48805
-rw-r--r--sys/net/pfvar.h1
-rw-r--r--sys/netpfil/pf/pf.c41
-rw-r--r--tests/sys/netpfil/pf/icmp.py48
3 files changed, 75 insertions, 15 deletions
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index e50fbc96a8ba..6f10a55b64a8 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1625,6 +1625,7 @@ struct pf_pdesc {
struct pf_rule_actions act;
u_int32_t off; /* protocol header offset */
+ bool df; /* IPv4 Don't fragment flag. */
u_int32_t hdrlen; /* protocol header length */
u_int32_t p_len; /* total length of protocol payload */
u_int32_t extoff; /* extentsion header offset */
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 0ebc813756f1..d78978a75317 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -9990,6 +9990,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
pd->ttl = h->ip_ttl;
pd->tot_len = ntohs(h->ip_len);
pd->act.rtableid = -1;
+ pd->df = h->ip_off & htons(IP_DF);
if (h->ip_hl > 5) /* has options */
pd->badopts++;
@@ -10317,21 +10318,6 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
return (PF_PASS);
}
-#ifdef INET6
- /*
- * If we end up changing IP addresses (e.g. binat) the stack may get
- * confused and fail to send the icmp6 packet too big error. Just send
- * it here, before we do any NAT.
- */
- if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
- IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) {
- PF_RULES_RUNLOCK();
- icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp));
- *m0 = NULL;
- return (PF_DROP);
- }
-#endif
-
if (__predict_false(! M_WRITABLE(*m0))) {
*m0 = m_unshare(*m0, M_NOWAIT);
if (*m0 == NULL) {
@@ -10380,6 +10366,31 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
goto done;
}
+#ifdef INET
+ if (af == AF_INET && dir == PF_OUT && pflags & PFIL_FWD &&
+ pd.df && (*m0)->m_pkthdr.len > ifp->if_mtu) {
+ PF_RULES_RUNLOCK();
+ icmp_error(*m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+ 0, ifp->if_mtu);
+ *m0 = NULL;
+ return (PF_DROP);
+ }
+#endif
+#ifdef INET6
+ /*
+ * If we end up changing IP addresses (e.g. binat) the stack may get
+ * confused and fail to send the icmp6 packet too big error. Just send
+ * it here, before we do any NAT.
+ */
+ if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
+ IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) {
+ PF_RULES_RUNLOCK();
+ icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp));
+ *m0 = NULL;
+ return (PF_DROP);
+ }
+#endif
+
if (__predict_false(ip_divert_ptr != NULL) &&
((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) {
struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1);
diff --git a/tests/sys/netpfil/pf/icmp.py b/tests/sys/netpfil/pf/icmp.py
index cb9de2bf0f36..e54f9f20a058 100644
--- a/tests/sys/netpfil/pf/icmp.py
+++ b/tests/sys/netpfil/pf/icmp.py
@@ -48,6 +48,7 @@ class TestICMP(VnetTestTemplate):
def vnet2_handler(self, vnet):
ifname = vnet.iface_alias_map["if1"].name
+ if2name = vnet.iface_alias_map["if2"].name
ToolsHelper.print_output("/sbin/pfctl -e")
ToolsHelper.pf_rules([
@@ -59,6 +60,8 @@ class TestICMP(VnetTestTemplate):
ToolsHelper.print_output("/sbin/sysctl net.inet.ip.forwarding=1")
ToolsHelper.print_output("/sbin/pfctl -x loud")
+ ToolsHelper.print_output("/sbin/ifconfig %s mtu 1492" % if2name)
+
def vnet3_handler(self, vnet):
# Import in the correct vnet, so at to not confuse Scapy
import scapy.all as sp
@@ -66,6 +69,7 @@ class TestICMP(VnetTestTemplate):
ifname = vnet.iface_alias_map["if2"].name
ToolsHelper.print_output("/sbin/route add default 198.51.100.1")
ToolsHelper.print_output("/sbin/ifconfig %s inet alias 198.51.100.3/24" % ifname)
+ ToolsHelper.print_output("/sbin/ifconfig %s mtu 1492" % ifname)
def checkfn(packet):
icmp = packet.getlayer(sp.ICMP)
@@ -124,3 +128,47 @@ class TestICMP(VnetTestTemplate):
# We expect the timeout here. It means we didn't get the destination
# unreachable packet in vnet3
pass
+
+ def check_icmp_echo(self, sp, payload_size):
+ packet = sp.IP(dst="198.51.100.2", flags="DF") \
+ / sp.ICMP(type='echo-request') \
+ / sp.raw(bytes.fromhex('f0') * payload_size)
+
+ p = sp.sr1(packet, iface=self.vnet.iface_alias_map["if1"].name,
+ timeout=3)
+ p.show()
+
+ ip = p.getlayer(sp.IP)
+ icmp = p.getlayer(sp.ICMP)
+ assert ip
+ assert icmp
+
+ if payload_size > 1464:
+ # Expect ICMP destination unreachable, fragmentation needed
+ assert ip.src == "192.0.2.1"
+ assert ip.dst == "192.0.2.2"
+ assert icmp.type == 3 # dest-unreach
+ assert icmp.code == 4
+ assert icmp.nexthopmtu == 1492
+ else:
+ # Expect echo reply
+ assert ip.src == "198.51.100.2"
+ assert ip.dst == "192.0.2.2"
+ assert icmp.type == 0 # "echo-reply"
+ assert icmp.code == 0
+
+ return
+
+ @pytest.mark.require_user("root")
+ def test_fragmentation_needed(self):
+ ToolsHelper.print_output("/sbin/route add default 192.0.2.1")
+
+ ToolsHelper.print_output("/sbin/ping -c 1 198.51.100.2")
+ ToolsHelper.print_output("/sbin/ping -c 1 -D -s 1472 198.51.100.2")
+
+ # Import in the correct vnet, so at to not confuse Scapy
+ import scapy.all as sp
+
+ self.check_icmp_echo(sp, 128)
+ self.check_icmp_echo(sp, 1464)
+ self.check_icmp_echo(sp, 1468)