diff options
28 files changed, 545 insertions, 488 deletions
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S index 130130b64541..877e377f892d 100644 --- a/sys/amd64/vmm/intel/vmx_support.S +++ b/sys/amd64/vmm/intel/vmx_support.S @@ -171,13 +171,11 @@ do_launch: */ movq %rsp, %rdi /* point %rdi back to 'vmxctx' */ movl $VMX_VMLAUNCH_ERROR, %eax - jmp decode_inst_error - + /* FALLTHROUGH */ decode_inst_error: movl $VM_FAIL_VALID, %r11d - jz inst_error - movl $VM_FAIL_INVALID, %r11d -inst_error: + movl $VM_FAIL_INVALID, %esi + cmovnzl %esi, %r11d movl %r11d, VMXCTX_INST_FAIL_STATUS(%rdi) /* diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index 5851ac08f083..b9161c586d07 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -147,9 +147,9 @@ enum ieee80211_vht_max_ampdu_len_exp { enum wlan_ht_cap_sm_ps { WLAN_HT_CAP_SM_PS_STATIC = 0, - WLAN_HT_CAP_SM_PS_DYNAMIC, - WLAN_HT_CAP_SM_PS_INVALID, - WLAN_HT_CAP_SM_PS_DISABLED, + WLAN_HT_CAP_SM_PS_DYNAMIC = 1, + WLAN_HT_CAP_SM_PS_INVALID = 2, + WLAN_HT_CAP_SM_PS_DISABLED = 3 }; #define WLAN_MAX_KEY_LEN 32 diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h index 044f348ef08b..239b4a5ae7b8 100644 --- a/sys/compat/linuxkpi/common/include/net/cfg80211.h +++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h @@ -57,8 +57,8 @@ extern int linuxkpi_debug_80211; #endif #define TODO(fmt, ...) if (linuxkpi_debug_80211 & D80211_TODO) \ printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) -#define IMPROVE(...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ - printf("%s:%d: XXX LKPI80211 IMPROVE\n", __func__, __LINE__) +#define IMPROVE(fmt, ...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ + printf("%s:%d: XXX LKPI80211 IMPROVE " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) enum rfkill_hard_block_reasons { RFKILL_HARD_BLOCK_NOT_OWNER = BIT(0), @@ -128,19 +128,24 @@ struct ieee80211_txrx_stypes { uint16_t rx; }; -/* XXX net80211 has an ieee80211_channel as well. */ +/* + * net80211 has an ieee80211_channel as well; we use the linuxkpi_ version + * interally in LinuxKPI and re-define ieee80211_channel for the drivers + * at the end of the file. + */ struct linuxkpi_ieee80211_channel { - /* TODO FIXME */ - uint32_t hw_value; /* ic_ieee */ - uint32_t center_freq; /* ic_freq */ - enum ieee80211_channel_flags flags; /* ic_flags */ + uint32_t center_freq; + uint16_t hw_value; + enum ieee80211_channel_flags flags; enum nl80211_band band; - int8_t max_power; /* ic_maxpower */ bool beacon_found; - int max_antenna_gain, max_reg_power; - int orig_flags; - int dfs_cac_ms, dfs_state; - int orig_mpwr; + enum nl80211_dfs_state dfs_state; + unsigned int dfs_cac_ms; + int max_antenna_gain; + int max_power; + int max_reg_power; + uint32_t orig_flags; + int orig_mpwr; }; struct cfg80211_bitrate_mask { @@ -1299,10 +1304,9 @@ reg_query_regdb_wmm(uint8_t *alpha2, uint32_t center_freq, struct ieee80211_reg_rule *rule) { - /* ETSI has special rules. FreeBSD regdb needs to learn about them. */ - TODO(); + IMPROVE("regdomain.xml needs to grow wmm information for at least ETSI"); - return (-ENXIO); + return (-ENODATA); } static __inline const u8 * diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index 2ed595095f9e..19f7bcff29dc 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -1135,7 +1135,7 @@ extern const struct cfg80211_ops linuxkpi_mac80211cfgops; struct ieee80211_hw *linuxkpi_ieee80211_alloc_hw(size_t, const struct ieee80211_ops *); void linuxkpi_ieee80211_iffree(struct ieee80211_hw *); -void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *, char *); +void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *); int linuxkpi_ieee80211_ifattach(struct ieee80211_hw *); void linuxkpi_ieee80211_ifdetach(struct ieee80211_hw *); void linuxkpi_ieee80211_unregister_hw(struct ieee80211_hw *); @@ -1255,7 +1255,7 @@ SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev) { set_wiphy_dev(hw->wiphy, dev); - linuxkpi_set_ieee80211_dev(hw, dev_name(dev)); + linuxkpi_set_ieee80211_dev(hw); IMPROVE(); } @@ -1741,12 +1741,15 @@ ieee80211_request_smps(struct ieee80211_vif *vif, u_int link_id, "SMPS_STATIC", "SMPS_DYNAMIC", "SMPS_AUTOMATIC", - "SMPS_NUM_MODES" }; - if (linuxkpi_debug_80211 & D80211_TODO) - printf("%s:%d: XXX LKPI80211 TODO smps %d %s\n", - __func__, __LINE__, smps, smps_mode_name[smps]); + if (vif->type != NL80211_IFTYPE_STATION) + return; + + if (smps >= nitems(smps_mode_name)) + panic("%s: unsupported smps value: %d\n", __func__, smps); + + IMPROVE("XXX LKPI80211 TODO smps %d %s\n", smps, smps_mode_name[smps]); } static __inline void diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index a7d6003843ba..500c1c1d52eb 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -274,48 +274,40 @@ lkpi_nl80211_sta_info_to_str(struct sbuf *s, const char *prefix, sbuf_printf(s, "\n"); } -static int -lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) +static void +lkpi_80211_dump_lvif_stas(struct lkpi_vif *lvif, struct sbuf *s) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct ieee80211vap *vap; - struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct station_info sinfo; - struct sbuf s; int error; - if (req->newptr) - return (EPERM); - - lvif = (struct lkpi_vif *)arg1; vif = LVIF_TO_VIF(lvif); vap = LVIF_TO_VAP(lvif); lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); - sbuf_new_for_sysctl(&s, NULL, 1024, req); - wiphy_lock(hw->wiphy); list_for_each_entry(lsta, &lvif->lsta_list, lsta_list) { sta = LSTA_TO_STA(lsta); - sbuf_putc(&s, '\n'); - sbuf_printf(&s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv); + sbuf_putc(s, '\n'); + sbuf_printf(s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv); memset(&sinfo, 0, sizeof(sinfo)); error = lkpi_80211_mo_sta_statistics(hw, vif, sta, &sinfo); if (error == EEXIST) /* Not added to driver. */ continue; if (error == ENOTSUPP) { - sbuf_printf(&s, " sta_statistics not supported\n"); + sbuf_printf(s, " sta_statistics not supported\n"); continue; } if (error != 0) { - sbuf_printf(&s, " sta_statistics failed: %d\n", error); + sbuf_printf(s, " sta_statistics failed: %d\n", error); continue; } @@ -325,51 +317,76 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) memcpy(&sinfo.rxrate, &lsta->sinfo.rxrate, sizeof(sinfo.rxrate)); sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } + /* If no CHAIN_SIGNAL is reported, try to fill it in from the lsta sinfo. */ + if ((sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) == 0 && + (lsta->sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) != 0) { + sinfo.chains = lsta->sinfo.chains; + memcpy(sinfo.chain_signal, lsta->sinfo.chain_signal, + sizeof(sinfo.chain_signal)); + sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + } - lkpi_nl80211_sta_info_to_str(&s, " nl80211_sta_info (valid fields)", sinfo.filled); - sbuf_printf(&s, " connected_time %u inactive_time %u\n", + lkpi_nl80211_sta_info_to_str(s, " nl80211_sta_info (valid fields)", sinfo.filled); + sbuf_printf(s, " connected_time %u inactive_time %u\n", sinfo.connected_time, sinfo.inactive_time); - sbuf_printf(&s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n", + sbuf_printf(s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n", (uintmax_t)sinfo.rx_bytes, sinfo.rx_packets, sinfo.rx_dropped_misc); - sbuf_printf(&s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n", + sbuf_printf(s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n", (uintmax_t)sinfo.rx_duration, sinfo.rx_beacon, (int8_t)sinfo.rx_beacon_signal_avg); - sbuf_printf(&s, " tx_bytes %ju tx_packets %u tx_failed %u\n", + sbuf_printf(s, " tx_bytes %ju tx_packets %u tx_failed %u\n", (uintmax_t)sinfo.tx_bytes, sinfo.tx_packets, sinfo.tx_failed); - sbuf_printf(&s, " tx_duration %ju tx_retries %u\n", + sbuf_printf(s, " tx_duration %ju tx_retries %u\n", (uintmax_t)sinfo.tx_duration, sinfo.tx_retries); - sbuf_printf(&s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n", + sbuf_printf(s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n", sinfo.signal, sinfo.signal_avg, sinfo.ack_signal, sinfo.avg_ack_signal); - - sbuf_printf(&s, " generation %d assoc_req_ies_len %zu chains %d\n", + sbuf_printf(s, " generation %d assoc_req_ies_len %zu chains %#04x\n", sinfo.generation, sinfo.assoc_req_ies_len, sinfo.chains); - for (int i = 0; i < sinfo.chains && i < IEEE80211_MAX_CHAINS; i++) { - sbuf_printf(&s, " chain[%d] signal %d signal_avg %d\n", + for (int i = 0; i < nitems(sinfo.chain_signal) && i < IEEE80211_MAX_CHAINS; i++) { + if (!(sinfo.chains & BIT(i))) + continue; + sbuf_printf(s, " chain[%d] signal %d signal_avg %d\n", i, (int8_t)sinfo.chain_signal[i], (int8_t)sinfo.chain_signal_avg[i]); } /* assoc_req_ies, bss_param, sta_flags */ - sbuf_printf(&s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", + sbuf_printf(s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", sinfo.rxrate.flags, CFG80211_RATE_INFO_FLAGS_BITS, sinfo.rxrate.bw, lkpi_rate_info_bw_to_str(sinfo.rxrate.bw), sinfo.rxrate.legacy * 100, sinfo.rxrate.mcs, sinfo.rxrate.nss); - sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", + sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", sinfo.rxrate.he_dcm, sinfo.rxrate.he_gi, sinfo.rxrate.he_ru_alloc, sinfo.rxrate.eht_gi); - sbuf_printf(&s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", + sbuf_printf(s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", sinfo.txrate.flags, CFG80211_RATE_INFO_FLAGS_BITS, sinfo.txrate.bw, lkpi_rate_info_bw_to_str(sinfo.txrate.bw), sinfo.txrate.legacy * 100, sinfo.txrate.mcs, sinfo.txrate.nss); - sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", + sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", sinfo.txrate.he_dcm, sinfo.txrate.he_gi, sinfo.txrate.he_ru_alloc, sinfo.txrate.eht_gi); } wiphy_unlock(hw->wiphy); +} + +static int +lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) +{ + struct lkpi_vif *lvif; + struct sbuf s; + + if (req->newptr) + return (EPERM); + + lvif = (struct lkpi_vif *)arg1; + + sbuf_new_for_sysctl(&s, NULL, 1024, req); + + lkpi_80211_dump_lvif_stas(lvif, &s); sbuf_finish(&s); sbuf_delete(&s); @@ -3826,8 +3843,10 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], IMPROVE(); + wiphy_lock(hw->wiphy); error = lkpi_80211_mo_start(hw); if (error != 0) { + wiphy_unlock(hw->wiphy); ic_printf(ic, "%s: failed to start hw: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); @@ -3837,11 +3856,13 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], error = lkpi_80211_mo_add_interface(hw, vif); if (error != 0) { IMPROVE(); /* XXX-BZ mo_stop()? */ + wiphy_unlock(hw->wiphy); ic_printf(ic, "%s: failed to add interface: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); return (NULL); } + wiphy_unlock(hw->wiphy); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_INSERT_TAIL(&lhw->lvif_head, lvif, lvif_entry); @@ -3873,10 +3894,12 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], /* Force MC init. */ lkpi_update_mcast_filter(ic, true); - IMPROVE(); - ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); + /* Now we have a valid vap->iv_ifp. Any checksum offloading goes below. */ + + IMPROVE(); + /* Override with LinuxKPI method so we can drive mac80211/cfg80211. */ lvif->iv_newstate = vap->iv_newstate; vap->iv_newstate = lkpi_iv_newstate; @@ -5571,6 +5594,12 @@ lkpi_ic_ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap return (-ENXIO); } + if (lsta->state != IEEE80211_STA_AUTHORIZED) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n", + __func__, lsta, ni, vap, sta, lsta->state); + return (-ENXIO); + } + params.sta = sta; params.action = IEEE80211_AMPDU_RX_START; params.buf_size = _IEEE80211_MASKSHIFT(le16toh(baparamset), IEEE80211_BAPS_BUFSIZ); @@ -5647,13 +5676,35 @@ lkpi_ic_ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); lsta = ni->ni_drv_data; + if (lsta == NULL) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, lsta is NULL\n", + __func__, lsta, ni, vap); + goto net80211_only; + } sta = LSTA_TO_STA(lsta); + if (!lsta->added_to_drv) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p not added to firmware\n", + __func__, lsta, ni, vap, sta); + goto net80211_only; + } + + if (lsta->state != IEEE80211_STA_AUTHORIZED) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n", + __func__, lsta, ni, vap, sta, lsta->state); + goto net80211_only; + } + IMPROVE_HT("This really should be passed from ht_recv_action_ba_delba."); for (tid = 0; tid < WME_NUM_TID; tid++) { if (&ni->ni_rx_ampdu[tid] == rap) break; } + if (tid == WME_NUM_TID) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p TID not found\n", + __func__, lsta, ni, vap, sta); + goto net80211_only; + } params.sta = sta; params.action = IEEE80211_AMPDU_RX_STOP; @@ -6014,17 +6065,30 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw) } void -linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw, char *name) +linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw) { struct lkpi_hw *lhw; struct ieee80211com *ic; + struct device *dev; lhw = HW_TO_LHW(hw); ic = lhw->ic; - /* Now set a proper name before ieee80211_ifattach(). */ + /* Save the backpointer from net80211 to LinuxKPI. */ ic->ic_softc = lhw; - ic->ic_name = name; + + /* + * Set a proper name before ieee80211_ifattach() if dev is set. + * ath1xk also unset the dev so we need to check. + */ + dev = wiphy_dev(hw->wiphy); + if (dev != NULL) { + ic->ic_name = dev_name(dev); + } else { + TODO("adjust arguments to still have the old dev or go through " + "the hoops of getting the bsddev from hw and detach; " + "or do in XXX; check ath1kx drivers"); + } /* XXX-BZ do we also need to set wiphy name? */ } @@ -6332,8 +6396,10 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw) hw->wiphy->max_scan_ie_len -= lhw->scan_ie_len; } - if (bootverbose) + if (bootverbose) { + ic_printf(ic, "netdev_features %b\n", hw->netdev_features, NETIF_F_BITS); ieee80211_announce(ic); + } return (0); err: @@ -6832,9 +6898,17 @@ lkpi_convert_rx_status(struct ieee80211_hw *hw, struct lkpi_sta *lsta, rx_stats->c_pktflags |= IEEE80211_RX_F_FAIL_FCSCRC; #endif + /* Fill in some sinfo bits to fill gaps not reported byt the driver. */ if (lsta != NULL) { memcpy(&lsta->sinfo.rxrate, &rxrate, sizeof(rxrate)); lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); + + if (rx_status->chains != 0) { + lsta->sinfo.chains = rx_status->chains; + memcpy(lsta->sinfo.chain_signal, rx_status->chain_signal, + sizeof(lsta->sinfo.chain_signal)); + lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + } } } diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c index 78b2120f2d8c..1046b753574f 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c +++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c @@ -53,6 +53,8 @@ lkpi_80211_mo_start(struct ieee80211_hw *hw) struct lkpi_hw *lhw; int error; + lockdep_assert_wiphy(hw->wiphy); + lhw = HW_TO_LHW(hw); if (lhw->ops->start == NULL) { error = EOPNOTSUPP; diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 70a2364f1fc6..62e06926e268 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -25,6 +25,10 @@ UBMIRROR="https://cloud-images.ubuntu.com" # default nic model for vm's NIC="virtio" +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in almalinux8) OSNAME="AlmaLinux 8" @@ -61,6 +65,14 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; + debian13) + OSNAME="Debian 13" + # TODO: Overwrite OSv to debian13 for virt-install until it's added to osinfo + OSv="debian12" + URL="https://cloud.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2" + OPTS[0]="--boot" + OPTS[1]="uefi=on" + ;; fedora41) OSNAME="Fedora 41" OSv="fedora-unknown" @@ -242,7 +254,7 @@ sudo virt-install \ --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ --cloud-init user-data=/tmp/user-data \ --disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} >/dev/null # Give the VMs hostnames so we don't have to refer to them with # hardcoded IP addresses. diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh index c41ecd09d52e..ee058b488088 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -41,7 +41,7 @@ function debian() { libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \ libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ - python3-cffi python3-dev python3-distlib python3-packaging \ + python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev echo "##[endgroup]" diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh index 6bf10024a1a6..0adcad2a99bc 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh @@ -12,16 +12,26 @@ source /var/tmp/env.txt # wait for poweroff to succeed PID=$(pidof /usr/bin/qemu-system-x86_64) tail --pid=$PID -f /dev/null -sudo virsh undefine openzfs +sudo virsh undefine --nvram openzfs # cpu pinning CPUSET=("0,1" "2,3") +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in freebsd*) # FreeBSD needs only 6GiB RAM=6 ;; + debian13) + RAM=8 + # Boot Debian 13 with uefi=on and secureboot=off (ZFS Kernel Module not signed) + OPTS[0]="--boot" + OPTS[1]="firmware=efi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no" + ;; *) # Linux needs more memory, but can be optimized to share it via KSM RAM=8 @@ -79,7 +89,7 @@ EOF --network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \ --disk $DISK-system,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ --disk $DISK-tests,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} done # generate some memory stats diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index cda620313189..4ebb80af1f03 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -29,7 +29,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -63,8 +63,8 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora41 - # debian: debian11, debian12, ubuntu22, ubuntu24 + # rhl: almalinux8, almalinux9, centos-stream9, fedora4x + # debian: debian12, debian13, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 1a9c671feac6..42f65290e4e3 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,8 +1,8 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.3.99 -Release: 1 +Version: 2.4.0 +Release: rc1 Release-Tags: relext License: CDDL Author: OpenZFS diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index a5f23be2aaaf..adaa5cd10961 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -1586,9 +1586,8 @@ dump_spacemap(objset_t *os, space_map_t *sm) continue; } - uint8_t words; char entry_type; - uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; + uint64_t entry_off, entry_run, entry_vdev; if (sm_entry_is_single_word(word)) { entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? @@ -1596,35 +1595,43 @@ dump_spacemap(objset_t *os, space_map_t *sm) entry_off = (SM_OFFSET_DECODE(word) << mapshift) + sm->sm_start; entry_run = SM_RUN_DECODE(word) << mapshift; - words = 1; + + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run); } else { /* it is a two-word entry so we read another word */ ASSERT(sm_entry_is_double_word(word)); uint64_t extra_word; offset += sizeof (extra_word); + ASSERT3U(offset, <, space_map_length(sm)); VERIFY0(dmu_read(os, space_map_object(sm), offset, sizeof (extra_word), &extra_word, DMU_READ_PREFETCH)); - ASSERT3U(offset, <=, space_map_length(sm)); - entry_run = SM2_RUN_DECODE(word) << mapshift; entry_vdev = SM2_VDEV_DECODE(word); entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 'A' : 'F'; entry_off = (SM2_OFFSET_DECODE(extra_word) << mapshift) + sm->sm_start; - words = 2; - } - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", - (u_longlong_t)entry_id, - entry_type, (u_longlong_t)entry_off, - (u_longlong_t)(entry_off + entry_run), - (u_longlong_t)entry_run, - (u_longlong_t)entry_vdev, words); + if (zopt_metaslab_args == 0 || + zopt_metaslab[0] == entry_vdev) { + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx " + "vdev: %llu\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run, + (u_longlong_t)entry_vdev); + } + } if (entry_type == 'A') alloc += entry_run; @@ -1873,7 +1880,7 @@ dump_metaslabs(spa_t *spa) (void) printf("\nMetaslabs:\n"); - if (!dump_opt['d'] && zopt_metaslab_args > 0) { + if (zopt_metaslab_args > 0) { c = zopt_metaslab[0]; if (c >= children) diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in index 3226d604546c..2b0568938b25 100755 --- a/sys/contrib/openzfs/contrib/debian/rules.in +++ b/sys/contrib/openzfs/contrib/debian/rules.in @@ -93,7 +93,7 @@ override_dh_auto_install: @# Install the DKMS source. @# We only want the files needed to build the modules install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \ - '$(CURDIR)/scripts/dkms.postbuild' + '$(CURDIR)/scripts/dkms.postbuild' '$(CURDIR)/scripts/objtool-wrapper.in' $(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;) @# Only ever build Linux modules @@ -108,8 +108,8 @@ override_dh_auto_install: @# - zfs.release$ @# * Takes care of spaces and tabs @# * Remove reference to ZFS_AC_PACKAGE - awk '/^AC_CONFIG_FILES\(\[/,/^\]\)/ {\ - if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))/) \ + awk '/^AC_CONFIG_FILES\(\[/,/\]\)/ {\ + if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))|scripts\/objtool-wrapper.*\]\)$$/) \ {next} } {print}' \ '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' | sed '/ZFS_AC_PACKAGE/d' > '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' @# Set "SUBDIRS = module include" for CONFIG_KERNEL and remove SUBDIRS for all other configs. diff --git a/sys/contrib/openzfs/include/sys/dmu_impl.h b/sys/contrib/openzfs/include/sys/dmu_impl.h index 21a8b16a3ee6..bae872bd1907 100644 --- a/sys/contrib/openzfs/include/sys/dmu_impl.h +++ b/sys/contrib/openzfs/include/sys/dmu_impl.h @@ -168,12 +168,10 @@ extern "C" { * dn_allocated_txg * dn_free_txg * dn_assigned_txg - * dn_dirty_txg + * dn_dirtycnt * dd_assigned_tx * dn_notxholds * dn_nodnholds - * dn_dirtyctx - * dn_dirtyctx_firstset * (dn_phys copy fields?) * (dn_phys contents?) * held from: diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index 76218c8b09ca..8bd1db5b7165 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -141,12 +141,6 @@ struct dmu_buf_impl; struct objset; struct zio; -enum dnode_dirtycontext { - DN_UNDIRTIED, - DN_DIRTY_OPEN, - DN_DIRTY_SYNC -}; - /* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ #define DNODE_FLAG_USED_BYTES (1 << 0) #define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1) @@ -340,11 +334,9 @@ struct dnode { uint64_t dn_allocated_txg; uint64_t dn_free_txg; uint64_t dn_assigned_txg; - uint64_t dn_dirty_txg; /* txg dnode was last dirtied */ + uint8_t dn_dirtycnt; kcondvar_t dn_notxholds; kcondvar_t dn_nodnholds; - enum dnode_dirtycontext dn_dirtyctx; - const void *dn_dirtyctx_firstset; /* dbg: contents meaningless */ /* protected by own devices */ zfs_refcount_t dn_tx_holds; @@ -440,7 +432,6 @@ void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); -void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); @@ -468,9 +459,6 @@ void dnode_free_interior_slots(dnode_t *dn); void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type); -#define DNODE_IS_DIRTY(_dn) \ - ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) - #define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ (((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \ diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 7403f10d91b7..fccc4c5b5b94 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2270,14 +2270,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (dn->dn_objset->os_dsl_dataset != NULL) rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG); #endif - /* - * We make this assert for private objects as well, but after we - * check if we're already dirty. They are allowed to re-dirty - * in syncing context. - */ - ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); mutex_enter(&db->db_mtx); /* @@ -2289,12 +2281,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_state == DB_CACHED || db->db_state == DB_FILL || db->db_state == DB_NOFILL); - mutex_enter(&dn->dn_mtx); - dnode_set_dirtyctx(dn, tx, db); - if (tx->tx_txg > dn->dn_dirty_txg) - dn->dn_dirty_txg = tx->tx_txg; - mutex_exit(&dn->dn_mtx); - if (db->db_blkid == DMU_SPILL_BLKID) dn->dn_have_spill = B_TRUE; @@ -2313,13 +2299,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr_next); } - /* - * Only valid if not already dirty. - */ - ASSERT(dn->dn_object == 0 || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); - ASSERT3U(dn->dn_nlevels, >, db->db_level); /* diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index a77f338bdfd3..8e6b569c2100 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -2037,6 +2037,8 @@ userquota_updates_task(void *arg) dn->dn_id_flags |= DN_ID_CHKED_BONUS; } dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); @@ -2070,6 +2072,10 @@ dnode_rele_task(void *arg) dnode_t *dn; while ((dn = multilist_sublist_head(list)) != NULL) { + mutex_enter(&dn->dn_mtx); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; + mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); dnode_rele(dn, &os->os_synced_dnodes); } diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 963ff41232a3..6c150d31c669 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -173,9 +173,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; + dn->dn_dirtycnt = 0; dn->dn_bonus = NULL; dn->dn_have_spill = B_FALSE; dn->dn_zio = NULL; @@ -229,9 +227,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); - ASSERT0(dn->dn_dirty_txg); - ASSERT0(dn->dn_dirtyctx); - ASSERT0P(dn->dn_dirtyctx_firstset); + ASSERT0(dn->dn_dirtycnt); ASSERT0P(dn->dn_bonus); ASSERT(!dn->dn_have_spill); ASSERT0P(dn->dn_zio); @@ -692,10 +688,8 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; if (dn->dn_bonus != NULL) { mutex_enter(&dn->dn_bonus->db_mtx); dbuf_destroy(dn->dn_bonus); @@ -800,11 +794,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dn->dn_bonuslen = bonuslen; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; - dn->dn_dirtyctx = 0; dn->dn_free_txg = 0; - dn->dn_dirtyctx_firstset = NULL; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; dn->dn_allocated_txg = tx->tx_txg; dn->dn_id_flags = 0; @@ -955,9 +947,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; - ndn->dn_dirty_txg = odn->dn_dirty_txg; - ndn->dn_dirtyctx = odn->dn_dirtyctx; - ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; + ndn->dn_dirtycnt = odn->dn_dirtycnt; ASSERT0(zfs_refcount_count(&odn->dn_tx_holds)); zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds); ASSERT(avl_is_empty(&ndn->dn_dbufs)); @@ -1020,9 +1010,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; - odn->dn_dirty_txg = 0; - odn->dn_dirtyctx = 0; - odn->dn_dirtyctx_firstset = NULL; + odn->dn_dirtycnt = 0; odn->dn_have_spill = B_FALSE; odn->dn_zio = NULL; odn->dn_oldused = 0; @@ -1273,8 +1261,8 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); boolean_t can_free = (dn->dn_type == DMU_OT_NONE && - zfs_refcount_is_zero(&dn->dn_holds) && - !DNODE_IS_DIRTY(dn)); + dn->dn_dirtycnt == 0 && + zfs_refcount_is_zero(&dn->dn_holds)); mutex_exit(&dn->dn_mtx); if (!can_free) @@ -1757,17 +1745,23 @@ dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp) * reference on the dnode. Returns FALSE if unable to add a * new reference. */ +static boolean_t +dnode_add_ref_locked(dnode_t *dn, const void *tag) +{ + ASSERT(MUTEX_HELD(&dn->dn_mtx)); + if (zfs_refcount_is_zero(&dn->dn_holds)) + return (FALSE); + VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + return (TRUE); +} + boolean_t dnode_add_ref(dnode_t *dn, const void *tag) { mutex_enter(&dn->dn_mtx); - if (zfs_refcount_is_zero(&dn->dn_holds)) { - mutex_exit(&dn->dn_mtx); - return (FALSE); - } - VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + boolean_t r = dnode_add_ref_locked(dn, tag); mutex_exit(&dn->dn_mtx); - return (TRUE); + return (r); } void @@ -1830,31 +1824,20 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) } /* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. + * Test if the dnode is dirty, or carrying uncommitted records. * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. + * dn_dirtycnt is the number of txgs this dnode is dirty on. It's incremented + * in dnode_setdirty() the first time the dnode is dirtied on a txg, and + * decremented in either dnode_rele_task() or userquota_updates_task() when the + * txg is synced out. */ boolean_t dnode_is_dirty(dnode_t *dn) { mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - + boolean_t dirty = (dn->dn_dirtycnt != 0); mutex_exit(&dn->dn_mtx); - - return (B_FALSE); + return (dirty); } void @@ -1916,7 +1899,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) * dnode will hang around after we finish processing its * children. */ - VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); + mutex_enter(&dn->dn_mtx); + VERIFY(dnode_add_ref_locked(dn, (void *)(uintptr_t)tx->tx_txg)); + dn->dn_dirtycnt++; + ASSERT3U(dn->dn_dirtycnt, <=, 3); + mutex_exit(&dn->dn_mtx); (void) dbuf_dirty(dn->dn_dbuf, tx); @@ -2221,32 +2208,6 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, mutex_exit(&dn->dn_dbufs_mtx); } -void -dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag) -{ - /* - * Don't set dirtyctx to SYNC if we're just modifying this as we - * initialize the objset. - */ - if (dn->dn_dirtyctx == DN_UNDIRTIED) { - dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - - if (ds != NULL) { - rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag); - } - if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - if (dmu_tx_is_syncing(tx)) - dn->dn_dirtyctx = DN_DIRTY_SYNC; - else - dn->dn_dirtyctx = DN_DIRTY_OPEN; - dn->dn_dirtyctx_firstset = tag; - } - if (ds != NULL) { - rrw_exit(&ds->ds_bp_rwlock, tag); - } - } -} - static void dnode_partial_zero(dnode_t *dn, uint64_t off, uint64_t blkoff, uint64_t len, dmu_tx_t *tx) diff --git a/sys/dev/iwx/if_iwx.c b/sys/dev/iwx/if_iwx.c index d60ef1874a6c..1fe531d69933 100644 --- a/sys/dev/iwx/if_iwx.c +++ b/sys/dev/iwx/if_iwx.c @@ -5673,6 +5673,10 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (rinfo == NULL) return EINVAL; + /* Offloaded sequence number assignment */ + /* Note: Should be done in firmware on all supported devices */ + + /* Radiotap */ if (ieee80211_radiotap_active_vap(vap)) { struct iwx_tx_radiotap_header *tap = &sc->sc_txtap; @@ -5685,6 +5689,7 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni) ieee80211_radiotap_tx(vap, m); } + /* Encrypt - CCMP via direct HW path, TKIP/WEP indirected openbsd-style for now */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_get_txkey(ni, m); if (k == NULL) { @@ -10467,6 +10472,8 @@ iwx_attach(device_t dev) IEEE80211_C_BGSCAN /* capable of bg scanning */ ; ic->ic_flags_ext = IEEE80211_FEXT_SCAN_OFFLOAD; + /* Enable seqno offload */ + ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD; ic->ic_txstream = 2; ic->ic_rxstream = 2; diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 12274bcceea1..72167b752e53 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -840,7 +840,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.3.99-571-FreeBSD_ga9410ccbd" +#define ZFS_META_ALIAS "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -870,10 +870,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "571-FreeBSD_ga9410ccbd" +#define ZFS_META_RELEASE "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" /* Define the project version. */ -#define ZFS_META_VERSION "2.3.99" +#define ZFS_META_VERSION "2.4.0" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 5c265cf5b08e..2b5d717da216 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.3.99-571-ga9410ccbd" +#define ZFS_META_GITREV "zfs-2.4.0-rc1-0-g00dfa094a" diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h index 141b13f9f740..3684fba52c5c 100644 --- a/sys/net80211/ieee80211_freebsd.h +++ b/sys/net80211/ieee80211_freebsd.h @@ -93,12 +93,22 @@ typedef struct { } while (0) #define IEEE80211_TX_LOCK_OBJ(_ic) (&(_ic)->ic_txlock.mtx) #define IEEE80211_TX_LOCK_DESTROY(_ic) mtx_destroy(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_LOCK(_ic) mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_UNLOCK(_ic) mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_LOCK_ASSERT(_ic) \ - mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED) -#define IEEE80211_TX_UNLOCK_ASSERT(_ic) \ - mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED) +#define IEEE80211_TX_LOCK(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic)); \ + } while (0); +#define IEEE80211_TX_UNLOCK(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic)); \ + } while (0); +#define IEEE80211_TX_LOCK_ASSERT(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED); \ + } while (0) +#define IEEE80211_TX_UNLOCK_ASSERT(_ic) { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED); \ + } while (0) /* * Stageq / ni_tx_superg lock diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index afe83ea0805c..57fe687adffe 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -974,7 +974,7 @@ ieee80211_send_setup( /* NB: zero out i_seq field (for s/w encryption etc) */ *(uint16_t *)&wh->i_seq[0] = 0; - } else + } else if (!IEEE80211_CONF_SEQNO_OFFLOAD(ni->ni_ic)) ieee80211_output_seqno_assign(ni, tid, m); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) @@ -1810,7 +1810,8 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, * and we don't need the TX lock held. */ if ((m->m_flags & M_AMPDU_MPDU) == 0) { - ieee80211_output_seqno_assign(ni, tid, m); + if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic)) + ieee80211_output_seqno_assign(ni, tid, m); } else { /* * NB: don't assign a sequence # to potential @@ -1828,7 +1829,9 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, *(uint16_t *)wh->i_seq = 0; } } else { - ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID, m); + if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic)) + ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID, + m); /* * XXX TODO: we shouldn't allow EAPOL, etc that would * be forced to be non-QoS traffic to be A-MSDU encapsulated. @@ -3856,6 +3859,8 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) * If the driver identifies it does its own TX seqno management then * we can skip this (and still not do the TX seqno.) */ + + /* TODO: IEEE80211_CONF_SEQNO_OFFLOAD() */ ieee80211_output_beacon_seqno_assign(ni, m); /* XXX faster to recalculate entirely or just changes? */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6492495dc583..ec7102223c2d 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2562,299 +2562,272 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, hhook_run_tcp_est_in(tp, th, &to); #endif - if (SEQ_LEQ(th->th_ack, tp->snd_una)) { - maxseg = tcp_maxseg(tp); - if (no_data && - (tiwin == tp->snd_wnd || - (tp->t_flags & TF_SACK_PERMIT))) { + if (SEQ_LT(th->th_ack, tp->snd_una)) { + /* This is old ACK information, don't process it. */ + break; + } + if (th->th_ack == tp->snd_una) { + /* Check if this is a duplicate ACK. */ + if ((tp->t_flags & TF_SACK_PERMIT) && + V_tcp_do_newsack) { /* - * If this is the first time we've seen a - * FIN from the remote, this is not a - * duplicate and it needs to be processed - * normally. This happens during a - * simultaneous close. + * If SEG.ACK == SND.UNA, RFC 6675 requires a + * duplicate ACK to selectively acknowledge + * at least one byte, which was not selectively + * acknowledged before. */ - if ((thflags & TH_FIN) && - (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { - tp->t_dupacks = 0; + if (sack_changed == SACK_NOCHANGE) { break; } - TCPSTAT_INC(tcps_rcvdupack); - /* - * If we have outstanding data (other than - * a window probe), this is a completely - * duplicate ack (ie, window info didn't - * change and FIN isn't set), - * the ack is the biggest we've - * seen and we've seen exactly our rexmt - * threshold of them, assume a packet - * has been dropped and retransmit it. - * Kludge snd_nxt & the congestion - * window so we send only this one - * packet. - * - * We know we're losing at the current - * window size so do congestion avoidance - * (set ssthresh to half the current window - * and pull our congestion window back to - * the new ssthresh). - * - * Dup acks mean that packets have left the - * network (they're now cached at the receiver) - * so bump cwnd by the amount in the receiver - * to keep a constant cwnd packets in the - * network. - * - * When using TCP ECN, notify the peer that - * we reduced the cwnd. - */ + } else { /* - * Following 2 kinds of acks should not affect - * dupack counting: - * 1) Old acks - * 2) Acks with SACK but without any new SACK - * information in them. These could result from - * any anomaly in the network like a switch - * duplicating packets or a possible DoS attack. + * If SEG.ACK == SND.UNA, RFC 5681 requires a + * duplicate ACK to have no data on it and to + * not be a window update. */ - if (th->th_ack != tp->snd_una || - (tcp_is_sack_recovery(tp, &to) && - (sack_changed == SACK_NOCHANGE))) { + if (!no_data || tiwin != tp->snd_wnd) { break; - } else if (!tcp_timer_active(tp, TT_REXMT)) { - tp->t_dupacks = 0; - } else if (++tp->t_dupacks > tcprexmtthresh || - IN_FASTRECOVERY(tp->t_flags)) { - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - if (V_tcp_do_prr && + } + } + /* + * If this is the first time we've seen a + * FIN from the remote, this is not a + * duplicate ACK and it needs to be processed + * normally. + * This happens during a simultaneous close. + */ + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + tp->t_dupacks = 0; + break; + } + /* Perform duplicate ACK processing. */ + TCPSTAT_INC(tcps_rcvdupack); + maxseg = tcp_maxseg(tp); + if (!tcp_timer_active(tp, TT_REXMT)) { + tp->t_dupacks = 0; + } else if (++tp->t_dupacks > tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, nsegs, CC_DUPACK); + if (V_tcp_do_prr && + IN_FASTRECOVERY(tp->t_flags) && + (tp->t_flags & TF_SACK_PERMIT)) { + tcp_do_prr_ack(tp, th, &to, + sack_changed, &maxseg); + } else if (tcp_is_sack_recovery(tp, &to) && IN_FASTRECOVERY(tp->t_flags) && - (tp->t_flags & TF_SACK_PERMIT)) { - tcp_do_prr_ack(tp, th, &to, - sack_changed, &maxseg); - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - (tp->snd_nxt == tp->snd_max)) { - int awnd; + (tp->snd_nxt == tp->snd_max)) { + int awnd; - /* - * Compute the amount of data in flight first. - * We can inject new data into the pipe iff - * we have less than ssthresh - * worth of data in flight. - */ - awnd = tcp_compute_pipe(tp); - if (awnd < tp->snd_ssthresh) { - tp->snd_cwnd += imax(maxseg, - imin(2 * maxseg, - tp->sackhint.delivered_data)); - if (tp->snd_cwnd > tp->snd_ssthresh) - tp->snd_cwnd = tp->snd_ssthresh; - } - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - SEQ_LT(tp->snd_nxt, tp->snd_max)) { + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than ssthresh + * worth of data in flight. + */ + awnd = tcp_compute_pipe(tp); + if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += imax(maxseg, imin(2 * maxseg, tp->sackhint.delivered_data)); - } else { - tp->snd_cwnd += maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; } - (void) tcp_output(tp); - goto drop; - } else if (tp->t_dupacks == tcprexmtthresh || - (tp->t_flags & TF_SACK_PERMIT && - V_tcp_do_newsack && - tp->sackhint.sacked_bytes > - (tcprexmtthresh - 1) * maxseg)) { + } else if (tcp_is_sack_recovery(tp, &to) && + IN_FASTRECOVERY(tp->t_flags) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tp->snd_cwnd += imax(maxseg, + imin(2 * maxseg, + tp->sackhint.delivered_data)); + } else { + tp->snd_cwnd += maxseg; + } + (void) tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == tcprexmtthresh || + (tp->t_flags & TF_SACK_PERMIT && + V_tcp_do_newsack && + tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * maxseg)) { enter_recovery: - /* - * Above is the RFC6675 trigger condition of - * more than (dupthresh-1)*maxseg sacked data. - * If the count of holes in the - * scoreboard is >= dupthresh, we could - * also enter loss recovery, but don't - * have that value readily available. - */ - tp->t_dupacks = tcprexmtthresh; - tcp_seq onxt = tp->snd_nxt; + /* + * Above is the RFC6675 trigger condition of + * more than (dupthresh-1)*maxseg sacked data. + * If the count of holes in the + * scoreboard is >= dupthresh, we could + * also enter loss recovery, but don't + * have that value readily available. + */ + tp->t_dupacks = tcprexmtthresh; + tcp_seq onxt = tp->snd_nxt; - /* - * If we're doing sack, check to - * see if we're already in sack - * recovery. If we're not doing sack, - * check to see if we're in newreno - * recovery. - */ - if (tcp_is_sack_recovery(tp, &to)) { - if (IN_FASTRECOVERY(tp->t_flags)) { - tp->t_dupacks = 0; - break; - } - } else { - if (SEQ_LEQ(th->th_ack, - tp->snd_recover)) { - tp->t_dupacks = 0; - break; - } + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tcp_is_sack_recovery(tp, &to)) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; } - /* Congestion signal before ack. */ - cc_cong_signal(tp, th, CC_NDUPACK); - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - tcp_timer_activate(tp, TT_REXMT, 0); - tp->t_rtttime = 0; - if (V_tcp_do_prr) { - /* - * snd_ssthresh and snd_recover are - * already updated by cc_cong_signal. - */ - if (tcp_is_sack_recovery(tp, &to)) { - /* - * Include Limited Transmit - * segments here - */ - tp->sackhint.prr_delivered = - imin(tp->snd_max - th->th_ack, - (tp->snd_limited + 1) * maxseg); - } else { - tp->sackhint.prr_delivered = - maxseg; - } - tp->sackhint.recover_fs = max(1, - tp->snd_nxt - tp->snd_una); + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; } - tp->snd_limited = 0; + } + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, nsegs, CC_DUPACK); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + if (V_tcp_do_prr) { + /* + * snd_ssthresh and snd_recover are + * already updated by cc_cong_signal. + */ if (tcp_is_sack_recovery(tp, &to)) { - TCPSTAT_INC(tcps_sack_recovery_episode); /* - * When entering LR after RTO due to - * Duplicate ACKs, retransmit existing - * holes from the scoreboard. + * Include Limited Transmit + * segments here */ - tcp_resend_sackholes(tp); - /* Avoid inflating cwnd in tcp_output */ - tp->snd_nxt = tp->snd_max; - tp->snd_cwnd = tcp_compute_pipe(tp) + + tp->sackhint.prr_delivered = + imin(tp->snd_max - th->th_ack, + (tp->snd_limited + 1) * maxseg); + } else { + tp->sackhint.prr_delivered = maxseg; - (void) tcp_output(tp); - /* Set cwnd to the expected flightsize */ - tp->snd_cwnd = tp->snd_ssthresh; - if (SEQ_GT(th->th_ack, tp->snd_una)) { - goto resume_partialack; - } - goto drop; } - tp->snd_nxt = th->th_ack; - tp->snd_cwnd = maxseg; - (void) tcp_output(tp); - KASSERT(tp->snd_limited <= 2, - ("%s: tp->snd_limited too big", - __func__)); - tp->snd_cwnd = tp->snd_ssthresh + - maxseg * - (tp->t_dupacks - tp->snd_limited); - if (SEQ_GT(onxt, tp->snd_nxt)) - tp->snd_nxt = onxt; - goto drop; - } else if (V_tcp_do_rfc3042) { - /* - * Process first and second duplicate - * ACKs. Each indicates a segment - * leaving the network, creating room - * for more. Make sure we can send a - * packet on reception of each duplicate - * ACK by increasing snd_cwnd by one - * segment. Restore the original - * snd_cwnd after packet transmission. - */ - cc_ack_received(tp, th, nsegs, CC_DUPACK); - uint32_t oldcwnd = tp->snd_cwnd; - tcp_seq oldsndmax = tp->snd_max; - u_int sent; - int avail; - - KASSERT(tp->t_dupacks == 1 || - tp->t_dupacks == 2, - ("%s: dupacks not 1 or 2", - __func__)); - if (tp->t_dupacks == 1) - tp->snd_limited = 0; - if ((tp->snd_nxt == tp->snd_max) && - (tp->t_rxtshift == 0)) - tp->snd_cwnd = - SEQ_SUB(tp->snd_nxt, - tp->snd_una) - - tcp_sack_adjust(tp); - tp->snd_cwnd += - (tp->t_dupacks - tp->snd_limited) * - maxseg - tcp_sack_adjust(tp); + tp->sackhint.recover_fs = max(1, + tp->snd_nxt - tp->snd_una); + } + tp->snd_limited = 0; + if (tcp_is_sack_recovery(tp, &to)) { + TCPSTAT_INC(tcps_sack_recovery_episode); /* - * Only call tcp_output when there - * is new data available to be sent - * or we need to send an ACK. + * When entering LR after RTO due to + * Duplicate ACKs, retransmit existing + * holes from the scoreboard. */ - SOCK_SENDBUF_LOCK(so); - avail = sbavail(&so->so_snd); - SOCK_SENDBUF_UNLOCK(so); - if (tp->t_flags & TF_ACKNOW || - (avail >= - SEQ_SUB(tp->snd_nxt, tp->snd_una))) { - (void) tcp_output(tp); - } - sent = SEQ_SUB(tp->snd_max, oldsndmax); - if (sent > maxseg) { - KASSERT((tp->t_dupacks == 2 && - tp->snd_limited == 0) || - (sent == maxseg + 1 && - tp->t_flags & TF_SENTFIN) || - (sent < 2 * maxseg && - tp->t_flags & TF_NODELAY), - ("%s: sent too much: %u>%u", - __func__, sent, maxseg)); - tp->snd_limited = 2; - } else if (sent > 0) { - ++tp->snd_limited; - } - tp->snd_cwnd = oldcwnd; + tcp_resend_sackholes(tp); + /* Avoid inflating cwnd in tcp_output */ + tp->snd_nxt = tp->snd_max; + tp->snd_cwnd = tcp_compute_pipe(tp) + + maxseg; + (void) tcp_output(tp); + /* Set cwnd to the expected flightsize */ + tp->snd_cwnd = tp->snd_ssthresh; goto drop; } - } - break; - } else { - /* - * This ack is advancing the left edge, reset the - * counter. - */ - tp->t_dupacks = 0; - /* - * If this ack also has new SACK info, increment the - * counter as per rfc6675. The variable - * sack_changed tracks all changes to the SACK - * scoreboard, including when partial ACKs without - * SACK options are received, and clear the scoreboard - * from the left side. Such partial ACKs should not be - * counted as dupacks here. - */ - if (tcp_is_sack_recovery(tp, &to) && - (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || - ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && - (tp->snd_nxt == tp->snd_max)) { - tp->t_dupacks++; - /* limit overhead by setting maxseg last */ - if (!IN_FASTRECOVERY(tp->t_flags) && - (tp->sackhint.sacked_bytes > - ((tcprexmtthresh - 1) * - (maxseg = tcp_maxseg(tp))))) { - goto enter_recovery; + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = maxseg; + (void) tcp_output(tp); + KASSERT(tp->snd_limited <= 2, + ("%s: tp->snd_limited too big", + __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ + cc_ack_received(tp, th, nsegs, CC_DUPACK); + uint32_t oldcwnd = tp->snd_cwnd; + tcp_seq oldsndmax = tp->snd_max; + u_int sent; + int avail; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + if ((tp->snd_nxt == tp->snd_max) && + (tp->t_rxtshift == 0)) + tp->snd_cwnd = + SEQ_SUB(tp->snd_nxt, + tp->snd_una) - + tcp_sack_adjust(tp); + tp->snd_cwnd += + (tp->t_dupacks - tp->snd_limited) * + maxseg - tcp_sack_adjust(tp); + /* + * Only call tcp_output when there + * is new data available to be sent + * or we need to send an ACK. + */ + SOCK_SENDBUF_LOCK(so); + avail = sbavail(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); + if (tp->t_flags & TF_ACKNOW || + (avail >= + SEQ_SUB(tp->snd_nxt, tp->snd_una))) { + (void) tcp_output(tp); + } + sent = SEQ_SUB(tp->snd_max, oldsndmax); + if (sent > maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == maxseg + 1 && + tp->t_flags & TF_SENTFIN) || + (sent < 2 * maxseg && + tp->t_flags & TF_NODELAY), + ("%s: sent too much: %u>%u", + __func__, sent, maxseg)); + tp->snd_limited = 2; + } else if (sent > 0) { + ++tp->snd_limited; } + tp->snd_cwnd = oldcwnd; + goto drop; } + break; } - -resume_partialack: KASSERT(SEQ_GT(th->th_ack, tp->snd_una), - ("%s: th_ack <= snd_una", __func__)); - + ("%s: SEQ_LEQ(th_ack, snd_una)", __func__)); + /* + * This ack is advancing the left edge, reset the + * counter. + */ + tp->t_dupacks = 0; + /* + * If this ack also has new SACK info, increment the + * t_dupacks as per RFC 6675. The variable + * sack_changed tracks all changes to the SACK + * scoreboard, including when partial ACKs without + * SACK options are received, and clear the scoreboard + * from the left side. Such partial ACKs should not be + * counted as dupacks here. + */ + if (V_tcp_do_newsack && + tcp_is_sack_recovery(tp, &to) && + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { + tp->t_dupacks++; + /* limit overhead by setting maxseg last */ + if (!IN_FASTRECOVERY(tp->t_flags) && + (tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) { + goto enter_recovery; + } + } /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index 8b871576d0b2..9beb80792af4 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -403,6 +403,7 @@ static const struct nlattr_parser nla_p_linfo[] = { NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); static const struct nlattr_parser nla_p_if[] = { + { .type = IFLA_ADDRESS, .off = _OUT(ifla_address), .cb = nlattr_get_nla }, { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, { .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 }, diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c index 4bf913d9c978..21db3017df18 100644 --- a/sys/netlink/route/iface_drivers.c +++ b/sys/netlink/route/iface_drivers.c @@ -105,6 +105,24 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } + if (lattrs->ifla_address != NULL) { + if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMAC)) { + error = if_setlladdr(ifp, + NLA_DATA(lattrs->ifla_address), + NLA_DATA_LEN(lattrs->ifla_address)); + if (error != 0) { + nlmsg_report_err_msg(npt, + "setting IFLA_ADDRESS failed with error code: %d", + error); + return (error); + } + } else { + nlmsg_report_err_msg(npt, + "Not enough privileges to set IFLA_ADDRESS"); + return (EPERM); + } + } + return (0); } diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h index b84b34461e35..41f110038b54 100644 --- a/sys/netlink/route/route_var.h +++ b/sys/netlink/route/route_var.h @@ -69,6 +69,7 @@ struct nl_parsed_link { char *ifla_cloner; char *ifla_ifalias; struct nlattr *ifla_idata; + struct nlattr *ifla_address; unsigned short ifi_type; int ifi_index; uint32_t ifla_link; diff --git a/tests/sys/netpfil/pf/mbuf.sh b/tests/sys/netpfil/pf/mbuf.sh index e3f138bb73b9..3abae65203cd 100644 --- a/tests/sys/netpfil/pf/mbuf.sh +++ b/tests/sys/netpfil/pf/mbuf.sh @@ -69,22 +69,22 @@ inet_in_mbuf_len_body() # Should still work for m_len=0 jexec alcatraz pfilctl link -i dummymbuf:inet inet jexec alcatraz sysctl net.dummymbuf.rules="inet in ${epair}b pull-head 0;" - atf_check_equal "0" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '0' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=1 jexec alcatraz sysctl net.dummymbuf.rules="inet in ${epair}b pull-head 1;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=19 # provided IPv4 basic header is 20 bytes long, it should impact the dst addr jexec alcatraz sysctl net.dummymbuf.rules="inet in ${epair}b pull-head 19;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' } inet_in_mbuf_len_cleanup() { @@ -140,22 +140,22 @@ inet6_in_mbuf_len_body() # Should still work for m_len=0 jexec alcatraz pfilctl link -i dummymbuf:inet6 inet6 jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 0;" - atf_check_equal "0" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '0' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=1 jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 1;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=39 # provided IPv6 basic header is 40 bytes long, it should impact the dst addr jexec alcatraz sysctl net.dummymbuf.rules="inet6 in ${epair}b pull-head 39;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 2001:db8::2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' } inet6_in_mbuf_len_cleanup() { @@ -205,29 +205,29 @@ ethernet_in_mbuf_len_body() # Should still work for m_len=0 jexec alcatraz pfilctl link -i dummymbuf:ethernet ethernet jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 0;" - atf_check_equal "0" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '0' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=1 jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 1;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=11 # for the simplest L2 Ethernet frame it should impact src field jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 11;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' # m_len=13 # provided L2 Ethernet simplest header is 14 bytes long, it should impact ethertype field jexec alcatraz sysctl net.dummymbuf.rules="ethernet in ${epair}b pull-head 13;" jexec alcatraz sysctl net.dummymbuf.hits=0 atf_check -s exit:0 -o ignore ping -c1 192.0.2.2 - atf_check_equal "1" "$(jexec alcatraz sysctl -n net.dummymbuf.hits)" + atf_check_equal '1' '$(jexec alcatraz sysctl -n net.dummymbuf.hits)' } ethernet_in_mbuf_len_cleanup() { |