diff options
Diffstat (limited to 'sys')
175 files changed, 6071 insertions, 1954 deletions
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S index 130130b64541..877e377f892d 100644 --- a/sys/amd64/vmm/intel/vmx_support.S +++ b/sys/amd64/vmm/intel/vmx_support.S @@ -171,13 +171,11 @@ do_launch: */ movq %rsp, %rdi /* point %rdi back to 'vmxctx' */ movl $VMX_VMLAUNCH_ERROR, %eax - jmp decode_inst_error - + /* FALLTHROUGH */ decode_inst_error: movl $VM_FAIL_VALID, %r11d - jz inst_error - movl $VM_FAIL_INVALID, %r11d -inst_error: + movl $VM_FAIL_INVALID, %esi + cmovnzl %esi, %r11d movl %r11d, VMXCTX_INST_FAIL_STATUS(%rdi) /* diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 7cd5327b9f1b..5c81c6cdce3d 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -195,7 +195,7 @@ freebsd32_fetch_syscall_args(struct thread *td) register_t *ap; struct syscall_args *sa; int error, i, nap, narg; - unsigned int args[4]; + unsigned int args[6]; nap = 4; p = td->td_proc; diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index 915aeeda3c9e..c9de1fe4c391 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -2021,11 +2021,11 @@ typedef struct vdev_indirect_config { typedef struct vdev { STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */ - STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */ vdev_list_t v_children; /* children of this vdev */ const char *v_name; /* vdev name */ uint64_t v_guid; /* vdev guid */ - uint64_t v_txg; /* most recent transaction */ + uint64_t v_label; /* label instantiated from (top vdev) */ + uint64_t v_txg; /* most recent transaction (top vdev) */ uint64_t v_id; /* index in parent */ uint64_t v_psize; /* physical device capacity */ int v_ashift; /* offset to block shift */ diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c index f51838ee00d7..927f3689e2b6 100644 --- a/sys/compat/linux/linux_netlink.c +++ b/sys/compat/linux/linux_netlink.c @@ -242,24 +242,9 @@ nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) } /* - * Translate a FreeBSD interface name to a Linux interface name. - */ -static bool -nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) -{ - char ifname[LINUX_IFNAMSIZ]; - - if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, - sizeof(ifname)) <= 0) - return (false); - return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); -} - -#define LINUX_NLA_UNHANDLED -1 -/* * Translate a FreeBSD attribute to a Linux attribute. - * Returns LINUX_NLA_UNHANDLED when the attribute is not processed - * and the caller must take care of it, otherwise the result is returned. + * Returns false when the attribute is not processed and the caller must take + * care of it. */ static int nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, @@ -271,22 +256,27 @@ nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, case NL_RTM_DELLINK: case NL_RTM_GETLINK: switch (nla->nla_type) { - case IFLA_IFNAME: - return (nlmsg_translate_ifname_nla(nla, nw)); + case IFLA_IFNAME: { + char ifname[LINUX_IFNAMSIZ]; + + if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, + sizeof(ifname)) > 0) + return (true); + break; + } default: break; } default: break; } - return (LINUX_NLA_UNHANDLED); + return (false); } static bool nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) { struct nlattr *nla; - int ret; int hdrlen = NETLINK_ALIGN(raw_hdrlen); int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; @@ -297,15 +287,12 @@ nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) if (nla->nla_len < sizeof(struct nlattr)) { return (false); } - ret = nlmsg_translate_all_nla(hdr, nla, nw); - if (ret == LINUX_NLA_UNHANDLED) - ret = nlmsg_copy_nla(nla, nw); - if (!ret) + if (!nlmsg_translate_all_nla(hdr, nla, nw) && + !nlmsg_copy_nla(nla, nw)) return (false); } return (true); } -#undef LINUX_NLA_UNHANDLED static unsigned int rtnl_if_flags_to_linux(unsigned int if_flags) @@ -563,22 +550,15 @@ nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) } } -static bool -nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) +static struct nl_buf * +nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp) { - struct nl_buf *nb, *orig; - u_int offset, msglen, orig_messages; + struct nl_writer nw; + u_int offset, msglen; - RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, - nw->buf->datalen, nw->num_messages); - - orig = nw->buf; - nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT); - if (__predict_false(nb == NULL)) - return (false); - nw->buf = nb; - orig_messages = nw->num_messages; - nw->num_messages = 0; + if (__predict_false(!nl_writer_unicast(&nw, + orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false))) + return (NULL); /* Assume correct headers. Buffer IS mutable */ for (offset = 0; @@ -587,22 +567,18 @@ nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; msglen = NLMSG_ALIGN(hdr->nlmsg_len); - if (!nlmsg_to_linux(hdr, nlp, nw)) { + if (!nlmsg_to_linux(hdr, nlp, &nw)) { RT_LOG(LOG_DEBUG, "failed to process msg type %d", hdr->nlmsg_type); - nl_buf_free(nb); - nw->buf = orig; - nw->num_messages = orig_messages; - return (false); + nl_buf_free(nw.buf); + return (NULL); } } - MPASS(nw->num_messages == orig_messages); - MPASS(nw->buf == nb); - nl_buf_free(orig); - RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset); + RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, + nw.buf->datalen, nw.num_messages); - return (true); + return (nw.buf); } static struct linux_netlink_provider linux_netlink_v1 = { diff --git a/sys/compat/linuxkpi/common/include/linux/ieee80211.h b/sys/compat/linuxkpi/common/include/linux/ieee80211.h index 3644ef80861b..b9161c586d07 100644 --- a/sys/compat/linuxkpi/common/include/linux/ieee80211.h +++ b/sys/compat/linuxkpi/common/include/linux/ieee80211.h @@ -35,6 +35,7 @@ #include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/bitfield.h> #include <linux/if_ether.h> /* linux_80211.c */ @@ -121,7 +122,20 @@ enum ieee80211_rate_control_changed_flags { /* 802.11-2016, 9.4.2.158.3 Supported VHT-MCS and NSS Set field. */ #define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) /* part of tx_highest */ -#define IEEE80211_VHT_MAX_AMPDU_1024K 7 /* 9.4.2.56.3 A-MPDU Parameters field, Table 9-163 */ +/* + * 802.11-2020, 9.4.2.157.2 VHT Capabilities Information field, + * Table 9-271-Subfields of the VHT Capabilities Information field (continued). + */ +enum ieee80211_vht_max_ampdu_len_exp { + IEEE80211_VHT_MAX_AMPDU_8K = 0, + IEEE80211_VHT_MAX_AMPDU_16K = 1, + IEEE80211_VHT_MAX_AMPDU_32K = 2, + IEEE80211_VHT_MAX_AMPDU_64K = 3, + IEEE80211_VHT_MAX_AMPDU_128K = 4, + IEEE80211_VHT_MAX_AMPDU_256K = 5, + IEEE80211_VHT_MAX_AMPDU_512K = 6, + IEEE80211_VHT_MAX_AMPDU_1024K = 7, +}; #define IEEE80211_WEP_IV_LEN 3 /* net80211: IEEE80211_WEP_IVLEN */ #define IEEE80211_WEP_ICV_LEN 4 @@ -133,9 +147,9 @@ enum ieee80211_rate_control_changed_flags { enum wlan_ht_cap_sm_ps { WLAN_HT_CAP_SM_PS_STATIC = 0, - WLAN_HT_CAP_SM_PS_DYNAMIC, - WLAN_HT_CAP_SM_PS_INVALID, - WLAN_HT_CAP_SM_PS_DISABLED, + WLAN_HT_CAP_SM_PS_DYNAMIC = 1, + WLAN_HT_CAP_SM_PS_INVALID = 2, + WLAN_HT_CAP_SM_PS_DISABLED = 3 }; #define WLAN_MAX_KEY_LEN 32 diff --git a/sys/compat/linuxkpi/common/include/net/cfg80211.h b/sys/compat/linuxkpi/common/include/net/cfg80211.h index 18b34f0e90ec..239b4a5ae7b8 100644 --- a/sys/compat/linuxkpi/common/include/net/cfg80211.h +++ b/sys/compat/linuxkpi/common/include/net/cfg80211.h @@ -36,6 +36,7 @@ #include <linux/mutex.h> #include <linux/if_ether.h> #include <linux/ethtool.h> +#include <linux/debugfs.h> #include <linux/device.h> #include <linux/netdevice.h> #include <linux/random.h> @@ -56,8 +57,8 @@ extern int linuxkpi_debug_80211; #endif #define TODO(fmt, ...) if (linuxkpi_debug_80211 & D80211_TODO) \ printf("%s:%d: XXX LKPI80211 TODO " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) -#define IMPROVE(...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ - printf("%s:%d: XXX LKPI80211 IMPROVE\n", __func__, __LINE__) +#define IMPROVE(fmt, ...) if (linuxkpi_debug_80211 & D80211_IMPROVE) \ + printf("%s:%d: XXX LKPI80211 IMPROVE " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) enum rfkill_hard_block_reasons { RFKILL_HARD_BLOCK_NOT_OWNER = BIT(0), @@ -127,19 +128,24 @@ struct ieee80211_txrx_stypes { uint16_t rx; }; -/* XXX net80211 has an ieee80211_channel as well. */ +/* + * net80211 has an ieee80211_channel as well; we use the linuxkpi_ version + * interally in LinuxKPI and re-define ieee80211_channel for the drivers + * at the end of the file. + */ struct linuxkpi_ieee80211_channel { - /* TODO FIXME */ - uint32_t hw_value; /* ic_ieee */ - uint32_t center_freq; /* ic_freq */ - enum ieee80211_channel_flags flags; /* ic_flags */ + uint32_t center_freq; + uint16_t hw_value; + enum ieee80211_channel_flags flags; enum nl80211_band band; - int8_t max_power; /* ic_maxpower */ bool beacon_found; - int max_antenna_gain, max_reg_power; - int orig_flags; - int dfs_cac_ms, dfs_state; - int orig_mpwr; + enum nl80211_dfs_state dfs_state; + unsigned int dfs_cac_ms; + int max_antenna_gain; + int max_power; + int max_reg_power; + uint32_t orig_flags; + int orig_mpwr; }; struct cfg80211_bitrate_mask { @@ -722,8 +728,10 @@ struct linuxkpi_ieee80211_regdomain { #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 0x04 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x08 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 0x10 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 0x10 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x20 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 0x40 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 0x40 #define VENDOR_CMD_RAW_DATA (void *)(uintptr_t)(-ENOENT) @@ -1296,10 +1304,9 @@ reg_query_regdb_wmm(uint8_t *alpha2, uint32_t center_freq, struct ieee80211_reg_rule *rule) { - /* ETSI has special rules. FreeBSD regdb needs to learn about them. */ - TODO(); + IMPROVE("regdomain.xml needs to grow wmm information for at least ETSI"); - return (-ENXIO); + return (-ENODATA); } static __inline const u8 * @@ -2065,6 +2072,18 @@ nl80211_chan_width_to_mhz(enum nl80211_chan_width width) } static __inline ssize_t +wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, const char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *, struct file *, char *, size_t, void *), + void *data) +{ + TODO(); + return (-ENXIO); +} + + +static __inline ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, const char __user *userbuf, size_t count, ssize_t (*handler)(struct wiphy *, struct file *, char *, size_t, void *), diff --git a/sys/compat/linuxkpi/common/include/net/mac80211.h b/sys/compat/linuxkpi/common/include/net/mac80211.h index af3199c38939..19f7bcff29dc 100644 --- a/sys/compat/linuxkpi/common/include/net/mac80211.h +++ b/sys/compat/linuxkpi/common/include/net/mac80211.h @@ -1135,7 +1135,7 @@ extern const struct cfg80211_ops linuxkpi_mac80211cfgops; struct ieee80211_hw *linuxkpi_ieee80211_alloc_hw(size_t, const struct ieee80211_ops *); void linuxkpi_ieee80211_iffree(struct ieee80211_hw *); -void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *, char *); +void linuxkpi_set_ieee80211_dev(struct ieee80211_hw *); int linuxkpi_ieee80211_ifattach(struct ieee80211_hw *); void linuxkpi_ieee80211_ifdetach(struct ieee80211_hw *); void linuxkpi_ieee80211_unregister_hw(struct ieee80211_hw *); @@ -1184,7 +1184,7 @@ struct wireless_dev *linuxkpi_ieee80211_vif_to_wdev(struct ieee80211_vif *); void linuxkpi_ieee80211_connection_loss(struct ieee80211_vif *); void linuxkpi_ieee80211_beacon_loss(struct ieee80211_vif *); struct sk_buff *linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *, - uint8_t *, uint8_t *, size_t, size_t); + const uint8_t *, const uint8_t *, size_t, size_t); void linuxkpi_ieee80211_tx_status(struct ieee80211_hw *, struct sk_buff *); void linuxkpi_ieee80211_tx_status_ext(struct ieee80211_hw *, struct ieee80211_tx_status *); @@ -1255,7 +1255,7 @@ SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev) { set_wiphy_dev(hw->wiphy, dev); - linuxkpi_set_ieee80211_dev(hw, dev_name(dev)); + linuxkpi_set_ieee80211_dev(hw); IMPROVE(); } @@ -1741,12 +1741,15 @@ ieee80211_request_smps(struct ieee80211_vif *vif, u_int link_id, "SMPS_STATIC", "SMPS_DYNAMIC", "SMPS_AUTOMATIC", - "SMPS_NUM_MODES" }; - if (linuxkpi_debug_80211 & D80211_TODO) - printf("%s:%d: XXX LKPI80211 TODO smps %d %s\n", - __func__, __LINE__, smps, smps_mode_name[smps]); + if (vif->type != NL80211_IFTYPE_STATION) + return; + + if (smps >= nitems(smps_mode_name)) + panic("%s: unsupported smps value: %d\n", __func__, smps); + + IMPROVE("XXX LKPI80211 TODO smps %d %s\n", smps, smps_mode_name[smps]); } static __inline void @@ -2161,8 +2164,8 @@ ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } static __inline struct sk_buff * -ieee80211_probereq_get(struct ieee80211_hw *hw, uint8_t *addr, - uint8_t *ssid, size_t ssid_len, size_t tailroom) +ieee80211_probereq_get(struct ieee80211_hw *hw, const uint8_t *addr, + const uint8_t *ssid, size_t ssid_len, size_t tailroom) { return (linuxkpi_ieee80211_probereq_get(hw, addr, ssid, ssid_len, diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index 1d00e8da8f9a..500c1c1d52eb 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -274,48 +274,40 @@ lkpi_nl80211_sta_info_to_str(struct sbuf *s, const char *prefix, sbuf_printf(s, "\n"); } -static int -lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) +static void +lkpi_80211_dump_lvif_stas(struct lkpi_vif *lvif, struct sbuf *s) { struct lkpi_hw *lhw; struct ieee80211_hw *hw; struct ieee80211vap *vap; - struct lkpi_vif *lvif; struct ieee80211_vif *vif; struct lkpi_sta *lsta; struct ieee80211_sta *sta; struct station_info sinfo; - struct sbuf s; int error; - if (req->newptr) - return (EPERM); - - lvif = (struct lkpi_vif *)arg1; vif = LVIF_TO_VIF(lvif); vap = LVIF_TO_VAP(lvif); lhw = vap->iv_ic->ic_softc; hw = LHW_TO_HW(lhw); - sbuf_new_for_sysctl(&s, NULL, 1024, req); - wiphy_lock(hw->wiphy); list_for_each_entry(lsta, &lvif->lsta_list, lsta_list) { sta = LSTA_TO_STA(lsta); - sbuf_putc(&s, '\n'); - sbuf_printf(&s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv); + sbuf_putc(s, '\n'); + sbuf_printf(s, "lsta %p sta %p added_to_drv %d\n", lsta, sta, lsta->added_to_drv); memset(&sinfo, 0, sizeof(sinfo)); error = lkpi_80211_mo_sta_statistics(hw, vif, sta, &sinfo); if (error == EEXIST) /* Not added to driver. */ continue; if (error == ENOTSUPP) { - sbuf_printf(&s, " sta_statistics not supported\n"); + sbuf_printf(s, " sta_statistics not supported\n"); continue; } if (error != 0) { - sbuf_printf(&s, " sta_statistics failed: %d\n", error); + sbuf_printf(s, " sta_statistics failed: %d\n", error); continue; } @@ -325,51 +317,76 @@ lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) memcpy(&sinfo.rxrate, &lsta->sinfo.rxrate, sizeof(sinfo.rxrate)); sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } + /* If no CHAIN_SIGNAL is reported, try to fill it in from the lsta sinfo. */ + if ((sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) == 0 && + (lsta->sinfo.filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) != 0) { + sinfo.chains = lsta->sinfo.chains; + memcpy(sinfo.chain_signal, lsta->sinfo.chain_signal, + sizeof(sinfo.chain_signal)); + sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + } - lkpi_nl80211_sta_info_to_str(&s, " nl80211_sta_info (valid fields)", sinfo.filled); - sbuf_printf(&s, " connected_time %u inactive_time %u\n", + lkpi_nl80211_sta_info_to_str(s, " nl80211_sta_info (valid fields)", sinfo.filled); + sbuf_printf(s, " connected_time %u inactive_time %u\n", sinfo.connected_time, sinfo.inactive_time); - sbuf_printf(&s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n", + sbuf_printf(s, " rx_bytes %ju rx_packets %u rx_dropped_misc %u\n", (uintmax_t)sinfo.rx_bytes, sinfo.rx_packets, sinfo.rx_dropped_misc); - sbuf_printf(&s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n", + sbuf_printf(s, " rx_duration %ju rx_beacon %u rx_beacon_signal_avg %d\n", (uintmax_t)sinfo.rx_duration, sinfo.rx_beacon, (int8_t)sinfo.rx_beacon_signal_avg); - sbuf_printf(&s, " tx_bytes %ju tx_packets %u tx_failed %u\n", + sbuf_printf(s, " tx_bytes %ju tx_packets %u tx_failed %u\n", (uintmax_t)sinfo.tx_bytes, sinfo.tx_packets, sinfo.tx_failed); - sbuf_printf(&s, " tx_duration %ju tx_retries %u\n", + sbuf_printf(s, " tx_duration %ju tx_retries %u\n", (uintmax_t)sinfo.tx_duration, sinfo.tx_retries); - sbuf_printf(&s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n", + sbuf_printf(s, " signal %d signal_avg %d ack_signal %d avg_ack_signal %d\n", sinfo.signal, sinfo.signal_avg, sinfo.ack_signal, sinfo.avg_ack_signal); - - sbuf_printf(&s, " generation %d assoc_req_ies_len %zu chains %d\n", + sbuf_printf(s, " generation %d assoc_req_ies_len %zu chains %#04x\n", sinfo.generation, sinfo.assoc_req_ies_len, sinfo.chains); - for (int i = 0; i < sinfo.chains && i < IEEE80211_MAX_CHAINS; i++) { - sbuf_printf(&s, " chain[%d] signal %d signal_avg %d\n", + for (int i = 0; i < nitems(sinfo.chain_signal) && i < IEEE80211_MAX_CHAINS; i++) { + if (!(sinfo.chains & BIT(i))) + continue; + sbuf_printf(s, " chain[%d] signal %d signal_avg %d\n", i, (int8_t)sinfo.chain_signal[i], (int8_t)sinfo.chain_signal_avg[i]); } /* assoc_req_ies, bss_param, sta_flags */ - sbuf_printf(&s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", + sbuf_printf(s, " rxrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", sinfo.rxrate.flags, CFG80211_RATE_INFO_FLAGS_BITS, sinfo.rxrate.bw, lkpi_rate_info_bw_to_str(sinfo.rxrate.bw), sinfo.rxrate.legacy * 100, sinfo.rxrate.mcs, sinfo.rxrate.nss); - sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", + sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", sinfo.rxrate.he_dcm, sinfo.rxrate.he_gi, sinfo.rxrate.he_ru_alloc, sinfo.rxrate.eht_gi); - sbuf_printf(&s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", + sbuf_printf(s, " txrate: flags %b bw %u(%s) legacy %u kbit/s mcs %u nss %u\n", sinfo.txrate.flags, CFG80211_RATE_INFO_FLAGS_BITS, sinfo.txrate.bw, lkpi_rate_info_bw_to_str(sinfo.txrate.bw), sinfo.txrate.legacy * 100, sinfo.txrate.mcs, sinfo.txrate.nss); - sbuf_printf(&s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", + sbuf_printf(s, " he_dcm %u he_gi %u he_ru_alloc %u eht_gi %u\n", sinfo.txrate.he_dcm, sinfo.txrate.he_gi, sinfo.txrate.he_ru_alloc, sinfo.txrate.eht_gi); } wiphy_unlock(hw->wiphy); +} + +static int +lkpi_80211_dump_stas(SYSCTL_HANDLER_ARGS) +{ + struct lkpi_vif *lvif; + struct sbuf s; + + if (req->newptr) + return (EPERM); + + lvif = (struct lkpi_vif *)arg1; + + sbuf_new_for_sysctl(&s, NULL, 1024, req); + + lkpi_80211_dump_lvif_stas(lvif, &s); sbuf_finish(&s); sbuf_delete(&s); @@ -3826,8 +3843,10 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], IMPROVE(); + wiphy_lock(hw->wiphy); error = lkpi_80211_mo_start(hw); if (error != 0) { + wiphy_unlock(hw->wiphy); ic_printf(ic, "%s: failed to start hw: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); @@ -3837,11 +3856,13 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], error = lkpi_80211_mo_add_interface(hw, vif); if (error != 0) { IMPROVE(); /* XXX-BZ mo_stop()? */ + wiphy_unlock(hw->wiphy); ic_printf(ic, "%s: failed to add interface: %d\n", __func__, error); mtx_destroy(&lvif->mtx); free(lvif, M_80211_VAP); return (NULL); } + wiphy_unlock(hw->wiphy); LKPI_80211_LHW_LVIF_LOCK(lhw); TAILQ_INSERT_TAIL(&lhw->lvif_head, lvif, lvif_entry); @@ -3873,10 +3894,12 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], /* Force MC init. */ lkpi_update_mcast_filter(ic, true); - IMPROVE(); - ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); + /* Now we have a valid vap->iv_ifp. Any checksum offloading goes below. */ + + IMPROVE(); + /* Override with LinuxKPI method so we can drive mac80211/cfg80211. */ lvif->iv_newstate = vap->iv_newstate; vap->iv_newstate = lkpi_iv_newstate; @@ -5571,6 +5594,12 @@ lkpi_ic_ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap return (-ENXIO); } + if (lsta->state != IEEE80211_STA_AUTHORIZED) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n", + __func__, lsta, ni, vap, sta, lsta->state); + return (-ENXIO); + } + params.sta = sta; params.action = IEEE80211_AMPDU_RX_START; params.buf_size = _IEEE80211_MASKSHIFT(le16toh(baparamset), IEEE80211_BAPS_BUFSIZ); @@ -5647,13 +5676,35 @@ lkpi_ic_ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) lvif = VAP_TO_LVIF(vap); vif = LVIF_TO_VIF(lvif); lsta = ni->ni_drv_data; + if (lsta == NULL) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, lsta is NULL\n", + __func__, lsta, ni, vap); + goto net80211_only; + } sta = LSTA_TO_STA(lsta); + if (!lsta->added_to_drv) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p not added to firmware\n", + __func__, lsta, ni, vap, sta); + goto net80211_only; + } + + if (lsta->state != IEEE80211_STA_AUTHORIZED) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p state %d not AUTHORIZED\n", + __func__, lsta, ni, vap, sta, lsta->state); + goto net80211_only; + } + IMPROVE_HT("This really should be passed from ht_recv_action_ba_delba."); for (tid = 0; tid < WME_NUM_TID; tid++) { if (&ni->ni_rx_ampdu[tid] == rap) break; } + if (tid == WME_NUM_TID) { + ic_printf(ic, "%s: lsta %p ni %p vap %p, sta %p TID not found\n", + __func__, lsta, ni, vap, sta); + goto net80211_only; + } params.sta = sta; params.action = IEEE80211_AMPDU_RX_STOP; @@ -6014,17 +6065,30 @@ linuxkpi_ieee80211_iffree(struct ieee80211_hw *hw) } void -linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw, char *name) +linuxkpi_set_ieee80211_dev(struct ieee80211_hw *hw) { struct lkpi_hw *lhw; struct ieee80211com *ic; + struct device *dev; lhw = HW_TO_LHW(hw); ic = lhw->ic; - /* Now set a proper name before ieee80211_ifattach(). */ + /* Save the backpointer from net80211 to LinuxKPI. */ ic->ic_softc = lhw; - ic->ic_name = name; + + /* + * Set a proper name before ieee80211_ifattach() if dev is set. + * ath1xk also unset the dev so we need to check. + */ + dev = wiphy_dev(hw->wiphy); + if (dev != NULL) { + ic->ic_name = dev_name(dev); + } else { + TODO("adjust arguments to still have the old dev or go through " + "the hoops of getting the bsddev from hw and detach; " + "or do in XXX; check ath1kx drivers"); + } /* XXX-BZ do we also need to set wiphy name? */ } @@ -6332,8 +6396,10 @@ linuxkpi_ieee80211_ifattach(struct ieee80211_hw *hw) hw->wiphy->max_scan_ie_len -= lhw->scan_ie_len; } - if (bootverbose) + if (bootverbose) { + ic_printf(ic, "netdev_features %b\n", hw->netdev_features, NETIF_F_BITS); ieee80211_announce(ic); + } return (0); err: @@ -6832,9 +6898,17 @@ lkpi_convert_rx_status(struct ieee80211_hw *hw, struct lkpi_sta *lsta, rx_stats->c_pktflags |= IEEE80211_RX_F_FAIL_FCSCRC; #endif + /* Fill in some sinfo bits to fill gaps not reported byt the driver. */ if (lsta != NULL) { memcpy(&lsta->sinfo.rxrate, &rxrate, sizeof(rxrate)); lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); + + if (rx_status->chains != 0) { + lsta->sinfo.chains = rx_status->chains; + memcpy(lsta->sinfo.chain_signal, rx_status->chain_signal, + sizeof(lsta->sinfo.chain_signal)); + lsta->sinfo.filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + } } } @@ -7668,8 +7742,8 @@ linuxkpi_ieee80211_queue_work(struct ieee80211_hw *hw, } struct sk_buff * -linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *hw, uint8_t *addr, - uint8_t *ssid, size_t ssid_len, size_t tailroom) +linuxkpi_ieee80211_probereq_get(struct ieee80211_hw *hw, const uint8_t *addr, + const uint8_t *ssid, size_t ssid_len, size_t tailroom) { struct sk_buff *skb; struct ieee80211_frame *wh; diff --git a/sys/compat/linuxkpi/common/src/linux_80211_macops.c b/sys/compat/linuxkpi/common/src/linux_80211_macops.c index 78b2120f2d8c..1046b753574f 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211_macops.c +++ b/sys/compat/linuxkpi/common/src/linux_80211_macops.c @@ -53,6 +53,8 @@ lkpi_80211_mo_start(struct ieee80211_hw *hw) struct lkpi_hw *lhw; int error; + lockdep_assert_wiphy(hw->wiphy); + lhw = HW_TO_LHW(hw); if (lhw->ops->start == NULL) { error = EOPNOTSUPP; diff --git a/sys/conf/files b/sys/conf/files index 8bb14bd3d953..d89813c70355 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3451,7 +3451,6 @@ dev/virtio/mmio/virtio_mmio.c optional virtio_mmio dev/virtio/mmio/virtio_mmio_acpi.c optional virtio_mmio acpi dev/virtio/mmio/virtio_mmio_cmdline.c optional virtio_mmio dev/virtio/mmio/virtio_mmio_fdt.c optional virtio_mmio fdt -dev/virtio/mmio/virtio_mmio_if.m optional virtio_mmio dev/virtio/network/if_vtnet.c optional vtnet dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/block/virtio_blk.c optional virtio_blk diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 80548320c3fc..c12ab9db030a 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -13,14 +13,14 @@ include "conf/files.x86" # elf-vdso.so.o standard \ dependency "$S/amd64/amd64/sigtramp.S assym.inc $S/conf/vdso_amd64.ldscript $S/tools/amd64_vdso.sh" \ - compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' DEBUG='${DEBUG}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_vdso.sh" \ + compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_vdso.sh" \ no-ctfconvert \ no-implicit-rule before-depend \ clean "elf-vdso.so.o elf-vdso.so.1 vdso_offsets.h sigtramp.pico" # elf-vdso32.so.o optional compat_freebsd32 \ dependency "$S/amd64/ia32/ia32_sigtramp.S ia32_assym.h $S/conf/vdso_amd64_ia32.ldscript $S/tools/amd64_ia32_vdso.sh" \ - compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' DEBUG='${DEBUG}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_ia32_vdso.sh" \ + compile-with "env AWK='${AWK}' NM='${NM}' LD='${LD}' CC='${CC}' OBJCOPY='${OBJCOPY}' ELFDUMP='${ELFDUMP}' S='${S}' sh $S/tools/amd64_ia32_vdso.sh" \ no-ctfconvert \ no-implicit-rule before-depend \ clean "elf-vdso32.so.o elf-vdso32.so.1 vdso_ia32_offsets.h ia32_sigtramp.pico" @@ -419,6 +419,9 @@ contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S optional zfs com contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S optional zfs compile-with "${ZFS_S}" contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S optional zfs compile-with "${ZFS_S}" +# zfs AVX2 implementation of aes-gcm from BoringSSL +contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S optional zfs compile-with "${ZFS_S}" + # zfs sha2 hash support zfs-sha256-x86_64.o optional zfs \ dependency "$S/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S" \ diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 0e4ffd92724e..bb3c7af82a4d 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -372,6 +372,19 @@ _ILINKS+= x86 _ILINKS+= i386 .endif +.if ${MK_REPRODUCIBLE_BUILD} != "no" +PREFIX_SYSDIR=/usr/src/sys +PREFIX_OBJDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/${KERN_IDENT} +CFLAGS+= -ffile-prefix-map=${SYSDIR}=${PREFIX_SYSDIR} +CFLAGS+= -ffile-prefix-map=${.OBJDIR}=${PREFIX_OBJDIR} +.if defined(SYSROOT) +CFLAGS+= -ffile-prefix-map=${SYSROOT}=/sysroot +.endif +.else +PREFIX_SYSDIR=${SYSDIR} +PREFIX_OBJDIR=${.OBJDIR} +.endif + # Ensure that the link exists without depending on it when it exists. # Ensure that debug info references the path in the source tree. .for _link in ${_ILINKS} @@ -379,9 +392,9 @@ _ILINKS+= i386 ${SRCS} ${DEPENDOBJS}: ${_link} .endif .if ${_link} == "machine" -CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include +CFLAGS+= -fdebug-prefix-map=./machine=${PREFIX_SYSDIR}/${MACHINE}/include .else -CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include +CFLAGS+= -fdebug-prefix-map=./${_link}=${PREFIX_SYSDIR}/${_link}/include .endif .endfor @@ -454,7 +467,7 @@ config.o env.o hints.o vers.o vnode_if.o: NEWVERS_ENV+= MAKE="${MAKE}" .if ${MK_REPRODUCIBLE_BUILD} != "no" -NEWVERS_ARGS+= -R +NEWVERS_ARGS+= -R -d ${PREFIX_OBJDIR} .endif vers.c: .NOMETA_CMP $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP:Nvers.*} ${NEWVERS_ENV} sh $S/conf/newvers.sh ${NEWVERS_ARGS} ${KERN_IDENT} diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 78178065e15b..1fcfd6467e7f 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -214,7 +214,8 @@ ZFS_CFLAGS+= -I$S/contrib/openzfs/module/icp/include \ .if ${MACHINE_ARCH} == "amd64" ZFS_CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \ - -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW + -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW \ + -DHAVE_VAES -DHAVE_VPCLMULQDQ .endif .if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \ diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index 645c04cdd135..0fd2d4050cf1 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -303,6 +303,25 @@ all: ${PROG} beforedepend: ${_ILINKS} beforebuild: ${_ILINKS} +.if ${MK_REPRODUCIBLE_BUILD} != "no" +PREFIX_SYSDIR=/usr/src/sys +CFLAGS+= -ffile-prefix-map=${SYSDIR}=${PREFIX_SYSDIR} +.if defined(KERNBUILDDIR) +PREFIX_KERNBUILDDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/${KERNBUILDDIR:T} +PREFIX_OBJDIR=${PREFIX_KERNBUILDDIR}/modules/usr/src/sys/modules/${.OBJDIR:T} +CFLAGS+= -ffile-prefix-map=${KERNBUILDDIR}=${PREFIX_KERNBUILDDIR} +.else +PREFIX_OBJDIR=/usr/obj/usr/src/${MACHINE}.${MACHINE_CPUARCH}/sys/modules/${.OBJDIR:T} +.endif +CFLAGS+= -ffile-prefix-map=${.OBJDIR}=${PREFIX_OBJDIR} +.if defined(SYSROOT) +CFLAGS+= -ffile-prefix-map=${SYSROOT}=/sysroot +.endif +.else +PREFIX_SYSDIR=${SYSDIR} +PREFIX_OBJDIR=${.OBJDIR} +.endif + # Ensure that the links exist without depending on it when it exists which # causes all the modules to be rebuilt when the directory pointed to changes. # Ensure that debug info references the path in the source tree. @@ -311,9 +330,9 @@ beforebuild: ${_ILINKS} OBJS_DEPEND_GUESS+= ${_link} .endif .if ${_link} == "machine" -CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include +CFLAGS+= -fdebug-prefix-map=./machine=${PREFIX_SYSDIR}/${MACHINE}/include .else -CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include +CFLAGS+= -fdebug-prefix-map=./${_link}=${PREFIX_SYSDIR}/${_link}/include .endif .endfor diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index 66926805052c..8b60da95741e 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -110,14 +110,18 @@ COPYRIGHT="$COPYRIGHT # We expand include_metadata later since we may set it to the # future value of modified. +builddir=$(pwd) include_metadata=yes modified=no -while getopts crRvV: opt; do +while getopts cd:rRvV: opt; do case "$opt" in c) echo "$COPYRIGHT" exit 0 ;; + d) + builddir=$OPTARG + ;; r) include_metadata=no ;; @@ -187,7 +191,7 @@ fi touch version v=$(cat version) u=${USER:-root} -d=$(pwd) +d=$builddir h=${HOSTNAME:-$(hostname)} if [ -n "$SOURCE_DATE_EPOCH" ]; then if ! t=$(date -ur $SOURCE_DATE_EPOCH 2>/dev/null); then diff --git a/sys/conf/options b/sys/conf/options index bb3d08e88e21..4009ba2b4843 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -235,11 +235,6 @@ TCP_REQUEST_TRK opt_global.h TCP_ACCOUNTING opt_global.h TCP_BBR opt_inet.h TCP_RACK opt_inet.h -# -# TCP SaD Detection is an experimental Sack attack Detection (SaD) -# algorithm that uses "normal" behaviour with SACK's to detect -# a possible attack. It is strictly experimental at this point. -# TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINS opt_global.h diff --git a/sys/contrib/dev/acpica/changes.txt b/sys/contrib/dev/acpica/changes.txt index 435540b254f1..4e3cf4f2f41c 100644 --- a/sys/contrib/dev/acpica/changes.txt +++ b/sys/contrib/dev/acpica/changes.txt @@ -1,11 +1,29 @@ ---------------------------------------- +7 August 2025. Summary of changes for version 20250807: + +Major changes: + +Added option to skip the global lock for SMM - Huacai Chen + +Fixed non-NUL terminated string implementations - Ahmed Salem + +Fixed CCEL and CDAT templates - Ahmed Salem + +Fixed a major Linux kernel bug (UAF) that was triggered by unequal number of method parameters (definition) vs arguments (invocation) in different places - Peter Williams, Hans de Goede, Rafael Wysocki + +Define distinct D3 states (D3Hot and D3Cold) that help clarify the device behavior support - Aymeric Wibo + +A few cleanups, improvements to existing table supports, small fixes, spelling corrections etc. + + +---------------------------------------- 4 April 2025. Summary of changes for version 20250404: Major changes: Update all the copyright continuation year to 2025 in the license header of all files -Add complete support for 3 new ACPI tables ? MRRM,ERDT and RIMT (Tony Luck & V L Sunil) +Add complete support for 3 new ACPI tables - MRRM,ERDT and RIMT (Tony Luck & V L Sunil) Add a license file to the project which is a great improvement (Dionna Glaze) @@ -21,11 +39,11 @@ Major changes: Fix 2 critical CVE addressing memory leaks - Seunghun Han -EINJ V2 updates ? Zaid Alali (Ampere Computing) +EINJ V2 updates - Zaid Alali (Ampere Computing) -CDAT updates ? Ira Weiny (Intel Corporation) +CDAT updates - Ira Weiny (Intel Corporation) -Fix mutex handling, don?t release ones that were never acquired ? Daniil Tatianin +Fix mutex handling, do not release ones that were never acquired - Daniil Tatianin Experiment with new tag name format Ryyyy_mm_dd to solve chronological sorting problems @@ -39,7 +57,7 @@ Fix the acpixf.h file which caused issues for the last release (before this) 202 Fix the pointer offset for the SLIC table -Verify the local environment and GitHub commits are all in sync which was a problem with the second from last release (before this)20240322 (aka 20240323 – date issue) +Verify the local environment and GitHub commits are all in sync which was a problem with the second from last release (before this)20240322 (aka 20240323 - date issue) diff --git a/sys/contrib/dev/acpica/common/adisasm.c b/sys/contrib/dev/acpica/common/adisasm.c index 96cd6c7f5d3c..83125098cbd1 100644 --- a/sys/contrib/dev/acpica/common/adisasm.c +++ b/sys/contrib/dev/acpica/common/adisasm.c @@ -481,12 +481,12 @@ AdDisassembleOneTable ( "FieldName : FieldValue (in hex)\n */\n\n"); AcpiDmDumpDataTable (Table); - fprintf (stderr, "Acpi Data Table [%4.4s] decoded\n", + fprintf (stdout, "Acpi Data Table [%4.4s] decoded\n", AcpiGbl_CDAT ? (char *) AcpiGbl_CDAT : Table->Signature); if (File) { - fprintf (stderr, "Formatted output: %s - %u bytes\n", + fprintf (stdout, "Formatted output: %s - %u bytes\n", DisasmFilename, CmGetFileSize (File)); } @@ -584,16 +584,16 @@ AdDisassembleOneTable ( AcpiDmDumpDataTable (Table); - fprintf (stderr, "Disassembly completed\n"); + fprintf (stdout, "Disassembly completed\n"); if (File) { - fprintf (stderr, "ASL Output: %s - %u bytes\n", + fprintf (stdout, "ASL Output: %s - %u bytes\n", DisasmFilename, CmGetFileSize (File)); } if (AslGbl_MapfileFlag) { - fprintf (stderr, "%14s %s - %u bytes\n", + fprintf (stdout, "%14s %s - %u bytes\n", AslGbl_FileDescs[ASL_FILE_MAP_OUTPUT].ShortDescription, AslGbl_Files[ASL_FILE_MAP_OUTPUT].Filename, FlGetFileSize (ASL_FILE_MAP_OUTPUT)); @@ -630,7 +630,7 @@ AdReparseOneTable ( ACPI_COMMENT_ADDR_NODE *AddrListHead; - fprintf (stderr, + fprintf (stdout, "\nFound %u external control methods, " "reparsing with new information\n", AcpiDmGetUnresolvedExternalMethodCount ()); diff --git a/sys/contrib/dev/acpica/common/ahtable.c b/sys/contrib/dev/acpica/common/ahtable.c index 898b2d09f609..587bf61016f0 100644 --- a/sys/contrib/dev/acpica/common/ahtable.c +++ b/sys/contrib/dev/acpica/common/ahtable.c @@ -265,6 +265,7 @@ const AH_TABLE AcpiGbl_SupportedTables[] = {ACPI_SIG_SSDT, "Secondary System Description Table (AML table)"}, {ACPI_SIG_STAO, "Status Override Table"}, {ACPI_SIG_SVKL, "Storage Volume Key Location Table"}, + {ACPI_SIG_SWFT, "SoundWire File Table"}, {ACPI_SIG_TCPA, "Trusted Computing Platform Alliance Table"}, {ACPI_SIG_TDEL, "TD-Event Log Table"}, {ACPI_SIG_TPM2, "Trusted Platform Module hardware interface Table"}, diff --git a/sys/contrib/dev/acpica/common/dmtable.c b/sys/contrib/dev/acpica/common/dmtable.c index fcff97a304ae..702f4f7965e4 100644 --- a/sys/contrib/dev/acpica/common/dmtable.c +++ b/sys/contrib/dev/acpica/common/dmtable.c @@ -721,6 +721,7 @@ const ACPI_DMTABLE_DATA AcpiDmTableData[] = {ACPI_SIG_SRAT, NULL, AcpiDmDumpSrat, DtCompileSrat, TemplateSrat}, {ACPI_SIG_STAO, NULL, AcpiDmDumpStao, DtCompileStao, TemplateStao}, {ACPI_SIG_SVKL, AcpiDmTableInfoSvkl, AcpiDmDumpSvkl, DtCompileSvkl, TemplateSvkl}, + {ACPI_SIG_SWFT, NULL, NULL, NULL, NULL}, {ACPI_SIG_TCPA, NULL, AcpiDmDumpTcpa, DtCompileTcpa, TemplateTcpa}, {ACPI_SIG_TDEL, AcpiDmTableInfoTdel, NULL, NULL, TemplateTdel}, {ACPI_SIG_TPM2, AcpiDmTableInfoTpm2, AcpiDmDumpTpm2, DtCompileTpm2, TemplateTpm2}, diff --git a/sys/contrib/dev/acpica/common/dmtbdump2.c b/sys/contrib/dev/acpica/common/dmtbdump2.c index 822920d2ea94..d29a60be0f67 100644 --- a/sys/contrib/dev/acpica/common/dmtbdump2.c +++ b/sys/contrib/dev/acpica/common/dmtbdump2.c @@ -2637,7 +2637,7 @@ AcpiDmDumpRhct ( RhctIsaString, RhctIsaString->IsaLength, AcpiDmTableInfoRhctIsa1); if (Subtable->Length > IsaPadOffset) { - Status = AcpiDmDumpTable (Table->Length, Offset + SubtableOffset, + Status = AcpiDmDumpTable (Table->Length, Offset + IsaPadOffset, ACPI_ADD_PTR (UINT8, Subtable, IsaPadOffset), (Subtable->Length - IsaPadOffset), AcpiDmTableInfoRhctIsaPad); } diff --git a/sys/contrib/dev/acpica/common/dmtbinfo2.c b/sys/contrib/dev/acpica/common/dmtbinfo2.c index 9ecf877fcfb0..b7c6d3b8d536 100644 --- a/sys/contrib/dev/acpica/common/dmtbinfo2.c +++ b/sys/contrib/dev/acpica/common/dmtbinfo2.c @@ -2180,7 +2180,7 @@ ACPI_DMTABLE_INFO AcpiDmTableInfoRhct[] = ACPI_DMTABLE_INFO AcpiDmTableInfoRhctNodeHdr[] = { {ACPI_DMT_RHCT, ACPI_RHCTH_OFFSET (Type), "Subtable Type", 0}, - {ACPI_DMT_UINT16, ACPI_RHCTH_OFFSET (Length), "Length", 0}, + {ACPI_DMT_UINT16, ACPI_RHCTH_OFFSET (Length), "Length", DT_LENGTH}, {ACPI_DMT_UINT16, ACPI_RHCTH_OFFSET (Revision), "Revision", 0}, ACPI_DMT_TERMINATOR }; diff --git a/sys/contrib/dev/acpica/common/dmtbinfo3.c b/sys/contrib/dev/acpica/common/dmtbinfo3.c index 75b580e0d890..0935fc86aff9 100644 --- a/sys/contrib/dev/acpica/common/dmtbinfo3.c +++ b/sys/contrib/dev/acpica/common/dmtbinfo3.c @@ -200,7 +200,7 @@ ACPI_DMTABLE_INFO AcpiDmTableInfoCcel[] = { {ACPI_DMT_UINT8, ACPI_CCEL_OFFSET (CCType), "CC Type", 0}, {ACPI_DMT_UINT8, ACPI_CCEL_OFFSET (CCSubType), "CC Sub Type", 0}, - {ACPI_DMT_UINT32, ACPI_CCEL_OFFSET (Reserved), "Reserved", 0}, + {ACPI_DMT_UINT16, ACPI_CCEL_OFFSET (Reserved), "Reserved", 0}, {ACPI_DMT_UINT64, ACPI_CCEL_OFFSET (LogAreaMinimumLength), "Log Area Minimum Length", 0}, {ACPI_DMT_UINT64, ACPI_CCEL_OFFSET (LogAreaStartAddress), "Log Area Start Address", 0}, ACPI_DMT_TERMINATOR diff --git a/sys/contrib/dev/acpica/compiler/aslanalyze.c b/sys/contrib/dev/acpica/compiler/aslanalyze.c index 17e2674817a9..625611a630de 100644 --- a/sys/contrib/dev/acpica/compiler/aslanalyze.c +++ b/sys/contrib/dev/acpica/compiler/aslanalyze.c @@ -572,10 +572,22 @@ ApCheckForGpeNameConflict ( ACPI_PARSE_OBJECT *NextOp; UINT32 GpeNumber; char Name[ACPI_NAMESEG_SIZE + 1]; - char Target[ACPI_NAMESEG_SIZE]; + char Target[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; - /* Need a null-terminated string version of NameSeg */ + /** + * Need a null-terminated string version of NameSeg + * + * NOTE: during a review on Name[ACPI_NAMESEG_SIZE + 1] having an extra + * byte[1], compiler testing exhibited a difference in behavior between + * GCC and Clang[2] (at least; MSVC may also exhibit the same) in + * how optimization is done. The extra byte is needed to ensure + * the signature does not get mangled, subsequently avoiding + * GpeNumber being a completely different return value from strtoul. + * + * [1] https://github.com/acpica/acpica/pull/1019#discussion_r2058687704 + * [2] https://github.com/acpica/acpica/pull/1019#discussion_r2061953039 + */ ACPI_MOVE_32_TO_32 (Name, Op->Asl.NameSeg); Name[ACPI_NAMESEG_SIZE] = 0; diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2s.c b/sys/contrib/dev/acpica/compiler/aslrestype2s.c index 096862290384..f47402d4e025 100644 --- a/sys/contrib/dev/acpica/compiler/aslrestype2s.c +++ b/sys/contrib/dev/acpica/compiler/aslrestype2s.c @@ -1469,7 +1469,7 @@ RsDoCsi2SerialBusDescriptor ( case 2: /* Local Port Instance [Integer] (_PRT) */ - RsSetFlagBits16 ((UINT16 *) &Descriptor->Csi2SerialBus.TypeSpecificFlags, InitializerOp, 0, 0); + RsSetFlagBits16 ((UINT16 *) &Descriptor->Csi2SerialBus.TypeSpecificFlags, InitializerOp, 2, 0); RsCreateMultiBitField (InitializerOp, ACPI_RESTAG_LOCALPORT, CurrentByteOffset + ASL_RESDESC_OFFSET (Csi2SerialBus.TypeSpecificFlags), 2, 6); break; diff --git a/sys/contrib/dev/acpica/compiler/dttable2.c b/sys/contrib/dev/acpica/compiler/dttable2.c index 6203a382ad62..754880346299 100644 --- a/sys/contrib/dev/acpica/compiler/dttable2.c +++ b/sys/contrib/dev/acpica/compiler/dttable2.c @@ -1929,24 +1929,30 @@ DtCompileRhct ( { ACPI_STATUS Status; ACPI_RHCT_NODE_HEADER *RhctHeader; - ACPI_RHCT_HART_INFO *RhctHartInfo = NULL; + ACPI_RHCT_HART_INFO *RhctHartInfo; DT_SUBTABLE *Subtable; DT_SUBTABLE *ParentTable; ACPI_DMTABLE_INFO *InfoTable; DT_FIELD **PFieldList = (DT_FIELD **) List; DT_FIELD *SubtableStart; + ACPI_TABLE_RHCT *Table; + BOOLEAN FirstNode = TRUE; /* Compile the main table */ + ParentTable = DtPeekSubtable (); Status = DtCompileTable (PFieldList, AcpiDmTableInfoRhct, &Subtable); if (ACPI_FAILURE (Status)) { return (Status); } + DtInsertSubtable (ParentTable, Subtable); + Table = ACPI_CAST_PTR (ACPI_TABLE_RHCT, ParentTable->Buffer); + Table->NodeCount = 0; + Table->NodeOffset = sizeof (ACPI_TABLE_RHCT); - ParentTable = DtPeekSubtable (); while (*PFieldList) { SubtableStart = *PFieldList; @@ -1961,7 +1967,10 @@ DtCompileRhct ( } DtInsertSubtable (ParentTable, Subtable); RhctHeader = ACPI_CAST_PTR (ACPI_RHCT_NODE_HEADER, Subtable->Buffer); - RhctHeader->Length = (UINT16)(Subtable->Length); + + DtPushSubtable (Subtable); + ParentTable = DtPeekSubtable (); + Table->NodeCount++; switch (RhctHeader->Type) { @@ -1999,37 +2008,54 @@ DtCompileRhct ( return (Status); } DtInsertSubtable (ParentTable, Subtable); - RhctHeader->Length += (UINT16)(Subtable->Length); + if (FirstNode) + { + Table->NodeOffset = ACPI_PTR_DIFF(ParentTable->Buffer, Table); + FirstNode = FALSE; + } /* Compile RHCT subtable additionals */ switch (RhctHeader->Type) { - case ACPI_RHCT_NODE_TYPE_HART_INFO: + case ACPI_RHCT_NODE_TYPE_ISA_STRING: - RhctHartInfo = ACPI_SUB_PTR (ACPI_RHCT_HART_INFO, - Subtable->Buffer, sizeof (ACPI_RHCT_NODE_HEADER)); - if (RhctHartInfo) + /* + * Padding - Variable-length data + * Optionally allows the padding of the ISA string to be used + * for filling this field. + */ + Status = DtCompileTable (PFieldList, AcpiDmTableInfoRhctIsaPad, + &Subtable); + if (ACPI_FAILURE (Status)) + { + return (Status); + } + if (Subtable) { + DtInsertSubtable (ParentTable, Subtable); + } + break; - RhctHartInfo->NumOffsets = 0; - while (*PFieldList) - { - Status = DtCompileTable (PFieldList, - AcpiDmTableInfoRhctHartInfo2, &Subtable); - if (ACPI_FAILURE (Status)) - { - return (Status); - } - if (!Subtable) - { - break; - } + case ACPI_RHCT_NODE_TYPE_HART_INFO: - DtInsertSubtable (ParentTable, Subtable); - RhctHeader->Length += (UINT16)(Subtable->Length); - RhctHartInfo->NumOffsets++; + RhctHartInfo = ACPI_CAST_PTR (ACPI_RHCT_HART_INFO, + Subtable->Buffer); + RhctHartInfo->NumOffsets = 0; + while (*PFieldList) + { + Status = DtCompileTable (PFieldList, + AcpiDmTableInfoRhctHartInfo2, &Subtable); + if (ACPI_FAILURE (Status)) + { + return (Status); } + if (!Subtable) + { + break; + } + DtInsertSubtable (ParentTable, Subtable); + RhctHartInfo->NumOffsets++; } break; @@ -2037,6 +2063,9 @@ DtCompileRhct ( break; } + + DtPopSubtable (); + ParentTable = DtPeekSubtable (); } return (AE_OK); diff --git a/sys/contrib/dev/acpica/compiler/dttemplate.c b/sys/contrib/dev/acpica/compiler/dttemplate.c index 67b13bb82d1b..d7140712d4e6 100644 --- a/sys/contrib/dev/acpica/compiler/dttemplate.c +++ b/sys/contrib/dev/acpica/compiler/dttemplate.c @@ -255,7 +255,7 @@ DtCreateTemplates ( if (AcpiGbl_Optind < 3) { - fprintf (stderr, "Creating default template: [DSDT]\n"); + fprintf (stdout, "Creating default template: [DSDT]\n"); Status = DtCreateOneTemplateFile (ACPI_SIG_DSDT, 0); goto Exit; } @@ -411,7 +411,7 @@ DtCreateAllTemplates ( ACPI_STATUS Status; - fprintf (stderr, "Creating all supported Template files\n"); + fprintf (stdout, "Creating all supported Template files\n"); /* Walk entire ACPI table data structure */ @@ -421,8 +421,13 @@ DtCreateAllTemplates ( if (TableData->Template) { - Status = DtCreateOneTemplate (TableData->Signature, - 0, TableData); + if (ACPI_COMPARE_NAMESEG (TableData->Signature, ACPI_SIG_CDAT)) + /* Special handling of CDAT */ + Status = DtCreateOneTemplate (TableData->Signature, + 0, NULL); + else + Status = DtCreateOneTemplate (TableData->Signature, + 0, TableData); if (ACPI_FAILURE (Status)) { return (Status); @@ -563,7 +568,7 @@ DtCreateOneTemplate ( } else { - /* Special ACPI tables - DSDT, SSDT, OSDT, FACS, RSDP */ + /* Special ACPI tables - DSDT, SSDT, OSDT, FACS, RSDP, CDAT */ AcpiOsPrintf (" (AML byte code table)\n"); AcpiOsPrintf (" */\n"); @@ -621,6 +626,11 @@ DtCreateOneTemplate ( AcpiDmDumpDataTable (ACPI_CAST_PTR (ACPI_TABLE_HEADER, TemplateRsdp)); } + else if (ACPI_COMPARE_NAMESEG (Signature, ACPI_SIG_CDAT)) + { + AcpiDmDumpCdat (ACPI_CAST_PTR (ACPI_TABLE_HEADER, + TemplateCdat)); + } else { fprintf (stderr, @@ -632,14 +642,14 @@ DtCreateOneTemplate ( if (TableCount == 0) { - fprintf (stderr, + fprintf (stdout, "Created ACPI table template for [%4.4s], " "written to \"%s\"\n", Signature, DisasmFilename); } else { - fprintf (stderr, + fprintf (stdout, "Created ACPI table templates for [%4.4s] " "and %u [SSDT] in same file, written to \"%s\"\n", Signature, TableCount, DisasmFilename); diff --git a/sys/contrib/dev/acpica/compiler/dttemplate.h b/sys/contrib/dev/acpica/compiler/dttemplate.h index 0fdd90f73a23..51a34be5c36b 100644 --- a/sys/contrib/dev/acpica/compiler/dttemplate.h +++ b/sys/contrib/dev/acpica/compiler/dttemplate.h @@ -389,7 +389,7 @@ const unsigned char TemplateBoot[] = const unsigned char TemplateCcel[] = { 0x43,0x43,0x45,0x4C,0x38,0x00,0x00,0x00, /* 00000000 "CCEL8..." */ - 0x04,0x1C,0x49,0x4E,0x54,0x45,0x4C,0x20, /* 00000008 "..INTEL " */ + 0x04,0x2E,0x49,0x4E,0x54,0x45,0x4C,0x20, /* 00000008 "..INTEL " */ 0x54,0x65,0x6D,0x70,0x6C,0x61,0x74,0x65, /* 00000010 "Template" */ 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ 0x30,0x09,0x21,0x20,0x00,0x00,0x00,0x00, /* 00000020 "0.! ...." */ @@ -1951,25 +1951,25 @@ const unsigned char TemplateRgrt[] = const unsigned char TemplateRhct[] = { - 0x52,0x48,0x43,0x54,0x96,0x00,0x00,0x00, /* 00000000 "RHCT|..." */ - 0x01,0x24,0x4F,0x45,0x4D,0x43,0x41,0x00, /* 00000008 "..OEMCA." */ + 0x52,0x48,0x43,0x54,0x96,0x00,0x00,0x00, /* 00000000 "RHCT...." */ + 0x01,0x6D,0x4F,0x45,0x4D,0x43,0x41,0x00, /* 00000008 ".mOEMCA." */ 0x54,0x45,0x4D,0x50,0x4C,0x41,0x54,0x45, /* 00000010 "TEMPLATE" */ 0x01,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ - 0x28,0x09,0x22,0x20,0x00,0x00,0x00,0x00, /* 00000020 "... ...." */ + 0x04,0x04,0x25,0x20,0x00,0x00,0x00,0x00, /* 00000020 "..% ...." */ 0x80,0x96,0x98,0x00,0x00,0x00,0x00,0x00, /* 00000028 "........" */ - 0x02,0x00,0x00,0x00,0x38,0x00,0x00,0x00, /* 00000030 "....8..." */ - 0x00,0x00,0x34,0x00,0x01,0x00,0x2B,0x00, /* 00000038 "..4...*." */ + 0x04,0x00,0x00,0x00,0x38,0x00,0x00,0x00, /* 00000030 "....8..." */ + 0x00,0x00,0x34,0x00,0x01,0x00,0x2B,0x00, /* 00000038 "..4...+." */ 0x72,0x76,0x36,0x34,0x69,0x6D,0x61,0x66, /* 00000040 "rv64imaf" */ 0x64,0x63,0x68,0x5F,0x7A,0x69,0x63,0x73, /* 00000048 "dch_zics" */ 0x72,0x5F,0x7A,0x69,0x66,0x65,0x6E,0x63, /* 00000050 "r_zifenc" */ 0x65,0x69,0x5F,0x7A,0x62,0x61,0x5F,0x7A, /* 00000058 "ei_zba_z" */ 0x62,0x62,0x5F,0x7A,0x62,0x63,0x5F,0x7A, /* 00000060 "bb_zbc_z" */ - 0x62,0x73,0x00,0x00,0xFF,0xFF,0x18,0x00, /* 00000068 "bs......" */ - 0x01,0x00,0x03,0x00,0x00,0x00,0x00,0x00, /* 00000070 "........" */ - 0x38,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, /* 00000078 "........" */ - 0x8E,0x00,0x00,0x00,0x01,0x00,0x0A,0x00, /* 00000080 "........" */ - 0x01,0x00,0x00,0x06,0x06,0x06,0x02,0x00, /* 00000088 "........" */ - 0x08,0x00,0x01,0x00,0x00,0x02 /* 00000090 "........" */ + 0x62,0x73,0x00,0x00,0x01,0x00,0x0A,0x00, /* 00000068 "bs......" */ + 0x01,0x00,0x00,0x06,0x06,0x06,0x02,0x00, /* 00000070 "........" */ + 0x08,0x00,0x01,0x00,0x00,0x02,0xFF,0xFF, /* 00000078 "........" */ + 0x18,0x00,0x01,0x00,0x03,0x00,0x00,0x00, /* 00000080 "........" */ + 0x00,0x00,0x3B,0x00,0x00,0x00,0x6C,0x00, /* 00000088 "..;...l." */ + 0x00,0x00,0x76,0x00,0x00,0x00 /* 00000090 "..v..." */ }; const unsigned char TemplateRimt[] = diff --git a/sys/contrib/dev/acpica/compiler/dtutils.c b/sys/contrib/dev/acpica/compiler/dtutils.c index f2463f74b8fc..18ea18cefdd6 100644 --- a/sys/contrib/dev/acpica/compiler/dtutils.c +++ b/sys/contrib/dev/acpica/compiler/dtutils.c @@ -623,6 +623,7 @@ DtGetFieldLength ( case ACPI_DMT_NFIT: case ACPI_DMT_PCI_PATH: case ACPI_DMT_PHAT: + case ACPI_DMT_RHCT: ByteLength = 2; break; diff --git a/sys/contrib/dev/acpica/components/disassembler/dmresrcl2.c b/sys/contrib/dev/acpica/components/disassembler/dmresrcl2.c index dd8cf4889885..551cf8178d94 100644 --- a/sys/contrib/dev/acpica/components/disassembler/dmresrcl2.c +++ b/sys/contrib/dev/acpica/components/disassembler/dmresrcl2.c @@ -778,7 +778,7 @@ AcpiDmCsi2SerialBusDescriptor ( AcpiOsPrintf (" 0x%2.2X, 0x%2.2X,\n", Resource->Csi2SerialBus.TypeSpecificFlags & 0x03, - Resource->Csi2SerialBus.TypeSpecificFlags & 0xFC); + (Resource->Csi2SerialBus.TypeSpecificFlags & 0xFC) >> 2); /* ResourceSource is a required field */ diff --git a/sys/contrib/dev/acpica/components/dispatcher/dsmethod.c b/sys/contrib/dev/acpica/components/dispatcher/dsmethod.c index 8b6efc070b1b..becdb95f8b83 100644 --- a/sys/contrib/dev/acpica/components/dispatcher/dsmethod.c +++ b/sys/contrib/dev/acpica/components/dispatcher/dsmethod.c @@ -646,8 +646,6 @@ AcpiDsCallControlMethod ( ACPI_WALK_STATE *NextWalkState = NULL; ACPI_OPERAND_OBJECT *ObjDesc; ACPI_EVALUATE_INFO *Info; - UINT32 i; - ACPI_FUNCTION_TRACE_PTR (DsCallControlMethod, ThisWalkState); @@ -670,6 +668,23 @@ AcpiDsCallControlMethod ( return_ACPI_STATUS (AE_NULL_OBJECT); } + if (ThisWalkState->NumOperands < ObjDesc->Method.ParamCount) + { + ACPI_ERROR ((AE_INFO, "Missing argument(s) for method [%4.4s]", + AcpiUtGetNodeName (MethodNode))); + + return_ACPI_STATUS (AE_AML_TOO_FEW_ARGUMENTS); + } + + else if (ThisWalkState->NumOperands > ObjDesc->Method.ParamCount) + { + ACPI_ERROR ((AE_INFO, "Too many arguments for method [%4.4s]", + AcpiUtGetNodeName (MethodNode))); + + return_ACPI_STATUS (AE_AML_TOO_MANY_ARGUMENTS); + } + + /* Init for new method, possibly wait on method mutex */ Status = AcpiDsBeginMethodExecution ( @@ -726,15 +741,7 @@ AcpiDsCallControlMethod ( * Delete the operands on the previous walkstate operand stack * (they were copied to new objects) */ - for (i = 0; i < ObjDesc->Method.ParamCount; i++) - { - AcpiUtRemoveReference (ThisWalkState->Operands [i]); - ThisWalkState->Operands [i] = NULL; - } - - /* Clear the operand stack */ - - ThisWalkState->NumOperands = 0; + AcpiDsClearOperands (ThisWalkState); ACPI_DEBUG_PRINT ((ACPI_DB_DISPATCH, "**** Begin nested execution of [%4.4s] **** WalkState=%p\n", diff --git a/sys/contrib/dev/acpica/components/dispatcher/dsmthdat.c b/sys/contrib/dev/acpica/components/dispatcher/dsmthdat.c index 42e1aa505d02..2c45e8c91f57 100644 --- a/sys/contrib/dev/acpica/components/dispatcher/dsmthdat.c +++ b/sys/contrib/dev/acpica/components/dispatcher/dsmthdat.c @@ -357,6 +357,7 @@ AcpiDsMethodDataInitArgs ( Index++; } + AcpiExTraceArgs(Params, Index); ACPI_DEBUG_PRINT ((ACPI_DB_EXEC, "%u args passed to method\n", Index)); return_ACPI_STATUS (AE_OK); diff --git a/sys/contrib/dev/acpica/components/events/evglock.c b/sys/contrib/dev/acpica/components/events/evglock.c index 872e7b499a8f..395ca14fb315 100644 --- a/sys/contrib/dev/acpica/components/events/evglock.c +++ b/sys/contrib/dev/acpica/components/events/evglock.c @@ -195,6 +195,11 @@ AcpiEvInitGlobalLockHandler ( return_ACPI_STATUS (AE_OK); } + if (!AcpiGbl_UseGlobalLock) + { + return_ACPI_STATUS (AE_OK); + } + /* Attempt installation of the global lock handler */ Status = AcpiInstallFixedEventHandler (ACPI_EVENT_GLOBAL, diff --git a/sys/contrib/dev/acpica/components/executer/extrace.c b/sys/contrib/dev/acpica/components/executer/extrace.c index 0eceb0ffccb1..d54d4908ca65 100644 --- a/sys/contrib/dev/acpica/components/executer/extrace.c +++ b/sys/contrib/dev/acpica/components/executer/extrace.c @@ -269,6 +269,68 @@ AcpiExGetTraceEventName ( #endif +/******************************************************************************* + * + * FUNCTION: AcpiExTraceArgs + * + * PARAMETERS: Params - AML method arguments + * Count - numer of method arguments + * + * RETURN: None + * + * DESCRIPTION: Trace any arguments + * + ******************************************************************************/ + +void +AcpiExTraceArgs(ACPI_OPERAND_OBJECT **Params, UINT32 Count) +{ + UINT32 i; + + ACPI_FUNCTION_NAME(ExTraceArgs); + + for (i = 0; i < Count; i++) + { + ACPI_OPERAND_OBJECT *obj_desc = Params[i]; + + if (!i) + { + ACPI_DEBUG_PRINT((ACPI_DB_TRACE_POINT, " ")); + } + + switch (obj_desc->Common.Type) + { + case ACPI_TYPE_INTEGER: + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "%lx", obj_desc->Integer.Value)); + break; + + case ACPI_TYPE_STRING: + if (!obj_desc->String.Length) + { + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "NULL")); + break; + } + if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_TRACE_POINT, _COMPONENT)) + { + AcpiUtPrintString(obj_desc->String.Pointer, ACPI_UINT8_MAX); + } + break; + + default: + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "Unknown")); + break; + } + + if ((i + 1) == Count) + { + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, "\n")); + } + else + { + ACPI_DEBUG_PRINT_RAW((ACPI_DB_TRACE_POINT, ", ")); + } + } +} /******************************************************************************* * @@ -299,9 +361,9 @@ AcpiExTracePoint ( if (Pathname) { ACPI_DEBUG_PRINT ((ACPI_DB_TRACE_POINT, - "%s %s [0x%p:%s] execution.\n", + "%s %s [%s] execution.\n", AcpiExGetTraceEventName (Type), Begin ? "Begin" : "End", - Aml, Pathname)); + Pathname)); } else { diff --git a/sys/contrib/dev/acpica/components/parser/psopinfo.c b/sys/contrib/dev/acpica/components/parser/psopinfo.c index 21c2b831ef24..1db32f4e8246 100644 --- a/sys/contrib/dev/acpica/components/parser/psopinfo.c +++ b/sys/contrib/dev/acpica/components/parser/psopinfo.c @@ -180,8 +180,8 @@ const ACPI_OPCODE_INFO * AcpiPsGetOpcodeInfo ( UINT16 Opcode) { -#ifdef ACPI_DEBUG_OUTPUT - const char *OpcodeName = "Unknown AML opcode"; +#if defined ACPI_ASL_COMPILER && defined ACPI_DEBUG_OUTPUT + const char *OpcodeName = "Unknown AML opcode"; #endif ACPI_FUNCTION_NAME (PsGetOpcodeInfo); @@ -207,7 +207,7 @@ AcpiPsGetOpcodeInfo ( #if defined ACPI_ASL_COMPILER && defined ACPI_DEBUG_OUTPUT #include <contrib/dev/acpica/compiler/asldefine.h> - + switch (Opcode) { case AML_RAW_DATA_BYTE: @@ -249,12 +249,12 @@ AcpiPsGetOpcodeInfo ( default: break; } -#endif /* Unknown AML opcode */ ACPI_DEBUG_PRINT ((ACPI_DB_EXEC, "%s [%4.4X]\n", OpcodeName, Opcode)); +#endif return (&AcpiGbl_AmlOpInfo [_UNK]); } diff --git a/sys/contrib/dev/acpica/components/tables/tbprint.c b/sys/contrib/dev/acpica/components/tables/tbprint.c index 7211673c42a2..8b812a890a07 100644 --- a/sys/contrib/dev/acpica/components/tables/tbprint.c +++ b/sys/contrib/dev/acpica/components/tables/tbprint.c @@ -279,6 +279,14 @@ AcpiTbPrintTableHeader ( ACPI_CAST_PTR (ACPI_TABLE_RSDP, Header)->Revision, LocalHeader.OemId)); } + else if (AcpiGbl_CDAT && !AcpiUtValidNameseg (Header->Signature)) + { + /* CDAT does not use the common ACPI table header */ + + ACPI_INFO (("%-4.4s 0x%8.8X%8.8X %06X", + ACPI_SIG_CDAT, ACPI_FORMAT_UINT64 (Address), + ACPI_CAST_PTR (ACPI_TABLE_CDAT, Header)->Length)); + } else { /* Standard ACPI table with full common header */ diff --git a/sys/contrib/dev/acpica/components/utilities/utnonansi.c b/sys/contrib/dev/acpica/components/utilities/utnonansi.c index bfbe1194ceae..f8b3a29e3283 100644 --- a/sys/contrib/dev/acpica/components/utilities/utnonansi.c +++ b/sys/contrib/dev/acpica/components/utilities/utnonansi.c @@ -353,7 +353,7 @@ AcpiUtSafeStrncpy ( { /* Always terminate destination string */ - memcpy (Dest, Source, DestSize); + strncpy (Dest, Source, DestSize); Dest[DestSize - 1] = 0; } diff --git a/sys/contrib/dev/acpica/include/acdebug.h b/sys/contrib/dev/acpica/include/acdebug.h index e335752148b9..63f39051a8ac 100644 --- a/sys/contrib/dev/acpica/include/acdebug.h +++ b/sys/contrib/dev/acpica/include/acdebug.h @@ -187,7 +187,7 @@ typedef struct acpi_db_execute_walk { UINT32 Count; UINT32 MaxCount; - char NameSeg[ACPI_NAMESEG_SIZE + 1] ACPI_NONSTRING; + char NameSeg[ACPI_NAMESEG_SIZE + 1]; } ACPI_DB_EXECUTE_WALK; diff --git a/sys/contrib/dev/acpica/include/acexcep.h b/sys/contrib/dev/acpica/include/acexcep.h index 57f98ab4540f..7216e0d49148 100644 --- a/sys/contrib/dev/acpica/include/acexcep.h +++ b/sys/contrib/dev/acpica/include/acexcep.h @@ -322,8 +322,11 @@ typedef struct acpi_exception_info #define AE_AML_TARGET_TYPE EXCEP_AML (0x0023) #define AE_AML_PROTOCOL EXCEP_AML (0x0024) #define AE_AML_BUFFER_LENGTH EXCEP_AML (0x0025) +#define AE_AML_TOO_FEW_ARGUMENTS EXCEP_AML (0x0026) +#define AE_AML_TOO_MANY_ARGUMENTS EXCEP_AML (0x0027) -#define AE_CODE_AML_MAX 0x0025 + +#define AE_CODE_AML_MAX 0x0027 /* @@ -456,7 +459,9 @@ static const ACPI_EXCEPTION_INFO AcpiGbl_ExceptionNames_Aml[] = EXCEP_TXT ("AE_AML_UNINITIALIZED_NODE", "A namespace node is uninitialized or unresolved"), EXCEP_TXT ("AE_AML_TARGET_TYPE", "A target operand of an incorrect type was encountered"), EXCEP_TXT ("AE_AML_PROTOCOL", "Violation of a fixed ACPI protocol"), - EXCEP_TXT ("AE_AML_BUFFER_LENGTH", "The length of the buffer is invalid/incorrect") + EXCEP_TXT ("AE_AML_BUFFER_LENGTH", "The length of the buffer is invalid/incorrect"), + EXCEP_TXT ("AE_AML_TOO_FEW_ARGUMENTS", "There are fewer than expected method arguments"), + EXCEP_TXT ("AE_AML_TOO_MANY_ARGUMENTS", "There are too many arguments for this method") }; static const ACPI_EXCEPTION_INFO AcpiGbl_ExceptionNames_Ctrl[] = diff --git a/sys/contrib/dev/acpica/include/acinterp.h b/sys/contrib/dev/acpica/include/acinterp.h index 74166384f172..b7f9e8f615e4 100644 --- a/sys/contrib/dev/acpica/include/acinterp.h +++ b/sys/contrib/dev/acpica/include/acinterp.h @@ -280,6 +280,10 @@ AcpiExTracePoint ( UINT8 *Aml, char *Pathname); +void +AcpiExTraceArgs( + ACPI_OPERAND_OBJECT **Params, + UINT32 Count); /* * exfield - ACPI AML (p-code) execution - field manipulation diff --git a/sys/contrib/dev/acpica/include/acpixf.h b/sys/contrib/dev/acpica/include/acpixf.h index 193b0e6a70dc..b5961e21bb9b 100644 --- a/sys/contrib/dev/acpica/include/acpixf.h +++ b/sys/contrib/dev/acpica/include/acpixf.h @@ -154,7 +154,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20250404 +#define ACPI_CA_VERSION 0x20250807 #include <contrib/dev/acpica/include/acconfig.h> #include <contrib/dev/acpica/include/actypes.h> @@ -358,6 +358,12 @@ ACPI_INIT_GLOBAL (UINT8, AcpiGbl_OsiData, 0); ACPI_INIT_GLOBAL (BOOLEAN, AcpiGbl_ReducedHardware, FALSE); /* + * ACPI Global Lock is mainly used for systems with SMM, so no-SMM systems + * (such as LoongArch) may not have and not use Global Lock. + */ +ACPI_INIT_GLOBAL (BOOLEAN, AcpiGbl_UseGlobalLock, TRUE); + +/* * Maximum timeout for While() loop iterations before forced method abort. * This mechanism is intended to prevent infinite loops during interpreter * execution within a host kernel. diff --git a/sys/contrib/dev/acpica/include/actbl.h b/sys/contrib/dev/acpica/include/actbl.h index eafd5d8a0f8b..ae52bd452c90 100644 --- a/sys/contrib/dev/acpica/include/actbl.h +++ b/sys/contrib/dev/acpica/include/actbl.h @@ -220,7 +220,7 @@ typedef struct acpi_table_header char OemId[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */ char OemTableId[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */ UINT32 OemRevision; /* OEM revision number */ - char AslCompilerId[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */ + char AslCompilerId[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII ASL compiler vendor ID */ UINT32 AslCompilerRevision; /* ASL compiler version */ } ACPI_TABLE_HEADER; diff --git a/sys/contrib/dev/acpica/include/actbl1.h b/sys/contrib/dev/acpica/include/actbl1.h index 876b721068c6..ec04f0a0ab9f 100644 --- a/sys/contrib/dev/acpica/include/actbl1.h +++ b/sys/contrib/dev/acpica/include/actbl1.h @@ -262,7 +262,7 @@ typedef struct acpi_whea_header /* Larger subtable header (when Length can exceed 255) */ -typedef struct acpi_subtable_header_16 +typedef struct acpi_subtbl_hdr_16 { UINT16 Type; UINT16 Length; diff --git a/sys/contrib/dev/acpica/include/actbl2.h b/sys/contrib/dev/acpica/include/actbl2.h index 4899929b2d45..a74b6d555a3a 100644 --- a/sys/contrib/dev/acpica/include/actbl2.h +++ b/sys/contrib/dev/acpica/include/actbl2.h @@ -201,6 +201,7 @@ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ #define ACPI_SIG_SVKL "SVKL" /* Storage Volume Key Location Table */ +#define ACPI_SIG_SWFT "SWFT" /* SoundWire File Table */ #define ACPI_SIG_TDEL "TDEL" /* TD Event Log Table */ @@ -4094,6 +4095,30 @@ enum acpi_svkl_format ACPI_SVKL_FORMAT_RESERVED = 1 /* 1 and greater are reserved */ }; +/******************************************************************************* + * + * SWFT - SoundWire File Table + * as described in Discovery and Configuration (DisCo) Specification + * for SoundWire® + * Version 1 + * + ******************************************************************************/ + +typedef struct acpi_table_swft +{ + ACPI_TABLE_HEADER Header; /* Common ACPI table header */ + +} ACPI_TABLE_SWFT; + +typedef struct acpi_swft_file +{ + UINT16 VendorID; + UINT32 FileID; + UINT16 FileVersion; + UINT16 FileLength; + UINT8 FileData[]; + +} ACPI_SWFT_FILE; /******************************************************************************* * diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 885a64037f89..62e06926e268 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -25,6 +25,10 @@ UBMIRROR="https://cloud-images.ubuntu.com" # default nic model for vm's NIC="virtio" +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in almalinux8) OSNAME="AlmaLinux 8" @@ -61,6 +65,14 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; + debian13) + OSNAME="Debian 13" + # TODO: Overwrite OSv to debian13 for virt-install until it's added to osinfo + OSv="debian12" + URL="https://cloud.debian.org/images/cloud/trixie/latest/debian-13-generic-amd64.qcow2" + OPTS[0]="--boot" + OPTS[1]="uefi=on" + ;; fedora41) OSNAME="Fedora 41" OSv="fedora-unknown" @@ -109,7 +121,7 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-CURRENT" + FreeBSD="15.0-PRERELEASE" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -242,7 +254,7 @@ sudo virt-install \ --network bridge=virbr0,model=$NIC,mac='52:54:00:83:79:00' \ --cloud-init user-data=/tmp/user-data \ --disk $DISK,bus=virtio,cache=none,format=raw,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} >/dev/null # Give the VMs hostnames so we don't have to refer to them with # hardcoded IP addresses. diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh index c41ecd09d52e..ee058b488088 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -41,7 +41,7 @@ function debian() { libelf-dev libffi-dev libmount-dev libpam0g-dev libselinux-dev libssl-dev \ libtool libtool-bin libudev-dev libunwind-dev linux-headers-$(uname -r) \ lsscsi nfs-kernel-server pamtester parted python3 python3-all-dev \ - python3-cffi python3-dev python3-distlib python3-packaging \ + python3-cffi python3-dev python3-distlib python3-packaging libtirpc-dev \ python3-setuptools python3-sphinx qemu-guest-agent rng-tools rpm2cpio \ rsync samba sysstat uuid-dev watchdog wget xfslibs-dev xxhash zlib1g-dev echo "##[endgroup]" diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh index 17e976ebcc39..2807d9e77127 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh @@ -5,12 +5,13 @@ # # Usage: # -# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--poweroff] -# [--release][--repo][--tarball] +# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM] +# [--poweroff][--release][--repo][--tarball] # # OS: OS name like 'fedora41' # --enable-debug: Build RPMs with '--enable-debug' (for testing) # --dkms: Build DKMS RPMs as well +# --patch-level NUM: Use a custom patch level number for packages. # --poweroff: Power-off the VM after building # --release Build zfs-release*.rpm as well # --repo After building everything, copy RPMs into /tmp/repo @@ -21,6 +22,7 @@ ENABLE_DEBUG="" DKMS="" +PATCH_LEVEL="" POWEROFF="" RELEASE="" REPO="" @@ -35,6 +37,11 @@ while [[ $# -gt 0 ]]; do DKMS=1 shift ;; + --patch-level) + PATCH_LEVEL=$2 + shift + shift + ;; --poweroff) POWEROFF=1 shift @@ -215,6 +222,10 @@ function rpm_build_and_install() { run ./autogen.sh echo "##[endgroup]" + if [ -n "$PATCH_LEVEL" ] ; then + sed -i -E 's/(Release:\s+)1/\1'$PATCH_LEVEL'/g' META + fi + echo "##[group]Configure" run ./configure --enable-debuginfo $extra echo "##[endgroup]" @@ -328,7 +339,13 @@ fi # almalinux9.5 # fedora42 source /etc/os-release -sudo hostname "$ID$VERSION_ID" + if which hostnamectl &> /dev/null ; then + # Fedora 42+ use hostnamectl + sudo hostnamectl set-hostname "$ID$VERSION_ID" + sudo hostnamectl set-hostname --pretty "$ID$VERSION_ID" +else + sudo hostname "$ID$VERSION_ID" +fi # save some sysinfo uname -a > /var/tmp/uname.txt diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh index 6bf10024a1a6..0adcad2a99bc 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-5-setup.sh @@ -12,16 +12,26 @@ source /var/tmp/env.txt # wait for poweroff to succeed PID=$(pidof /usr/bin/qemu-system-x86_64) tail --pid=$PID -f /dev/null -sudo virsh undefine openzfs +sudo virsh undefine --nvram openzfs # cpu pinning CPUSET=("0,1" "2,3") +# additional options for virt-install +OPTS[0]="" +OPTS[1]="" + case "$OS" in freebsd*) # FreeBSD needs only 6GiB RAM=6 ;; + debian13) + RAM=8 + # Boot Debian 13 with uefi=on and secureboot=off (ZFS Kernel Module not signed) + OPTS[0]="--boot" + OPTS[1]="firmware=efi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no" + ;; *) # Linux needs more memory, but can be optimized to share it via KSM RAM=8 @@ -79,7 +89,7 @@ EOF --network bridge=virbr0,model=$NIC,mac="52:54:00:83:79:0$i" \ --disk $DISK-system,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ --disk $DISK-tests,bus=virtio,cache=none,format=$FORMAT,driver.discard=unmap \ - --import --noautoconsole >/dev/null + --import --noautoconsole ${OPTS[0]} ${OPTS[1]} done # generate some memory stats diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml index 5b5afe746859..d8a95954fe1a 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml @@ -32,6 +32,11 @@ on: options: - "Build RPMs" - "Test repo" + patch_level: + type: string + required: false + default: "" + description: "(optional) patch level number" repo_url: type: string required: false @@ -78,7 +83,13 @@ jobs: mkdir -p /tmp/repo ssh zfs@vm0 '$HOME/zfs/.github/workflows/scripts/qemu-test-repo-vm.sh' ${{ github.event.inputs.repo_url }} else - .github/workflows/scripts/qemu-4-build.sh --repo --release --dkms --tarball ${{ matrix.os }} + EXTRA="" + if [ -n "${{ github.event.inputs.patch_level }}" ] ; then + EXTRA="--patch-level ${{ github.event.inputs.patch_level }}" + fi + + .github/workflows/scripts/qemu-4-build.sh $EXTRA \ + --repo --release --dkms --tarball ${{ matrix.os }} fi - name: Prepare artifacts diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index cda620313189..4ebb80af1f03 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -29,7 +29,7 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora41", "fedora42", "freebsd13-5r", "freebsd14-3s", "freebsd15-0c", "ubuntu22", "ubuntu24"]' QUICK_OS='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd14-3s", "ubuntu24"]' # determine CI type when running on PR ci_type="full" @@ -63,8 +63,8 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora41 - # debian: debian11, debian12, ubuntu22, ubuntu24 + # rhl: almalinux8, almalinux9, centos-stream9, fedora4x + # debian: debian12, debian13, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r diff --git a/sys/contrib/openzfs/.mailmap b/sys/contrib/openzfs/.mailmap index b6d942c000b8..e6f09c6c9d43 100644 --- a/sys/contrib/openzfs/.mailmap +++ b/sys/contrib/openzfs/.mailmap @@ -23,6 +23,7 @@ # These maps are making names consistent where they have varied but the email # address has never changed. In most cases, the full name is in the # Signed-off-by of a commit with a matching author. +Achill Gilgenast <achill@achill.org> Ahelenia ZiemiaÅ„ska <nabijaczleweli@gmail.com> Ahelenia ZiemiaÅ„ska <nabijaczleweli@nabijaczleweli.xyz> Alex John <alex@stty.io> @@ -37,6 +38,7 @@ Crag Wang <crag0715@gmail.com> Damian Szuberski <szuberskidamian@gmail.com> Daniel Kolesa <daniel@octaforge.org> Debabrata Banerjee <dbavatar@gmail.com> +Diwakar Kristappagari <diwakar-k@hpe.com> Finix Yan <yanchongwen@hotmail.com> Gaurav Kumar <gauravk.18@gmail.com> Gionatan Danti <g.danti@assyoma.it> @@ -145,6 +147,7 @@ Gaurav Kumar <gauravk.18@gmail.com> <gaurkuma@users.noreply.github.com> George Gaydarov <git@gg7.io> <gg7@users.noreply.github.com> Georgy Yakovlev <gyakovlev@gentoo.org> <168902+gyakovlev@users.noreply.github.com> Gerardwx <gerardw@alum.mit.edu> <Gerardwx@users.noreply.github.com> +Germano Massullo <germano.massullo@gmail.com> <Germano0@users.noreply.github.com> Gian-Carlo DeFazio <defazio1@llnl.gov> <defaziogiancarlo@users.noreply.github.com> Giuseppe Di Natale <dinatale2@llnl.gov> <dinatale2@users.noreply.github.com> Hajo Möller <dasjoe@gmail.com> <dasjoe@users.noreply.github.com> @@ -164,6 +167,7 @@ John Ramsden <johnramsden@riseup.net> <johnramsden@users.noreply.github.com> Jonathon Fernyhough <jonathon@m2x.dev> <559369+jonathonf@users.noreply.github.com> Jose Luis Duran <jlduran@gmail.com> <jlduran@users.noreply.github.com> Justin Hibbits <chmeeedalf@gmail.com> <chmeeedalf@users.noreply.github.com> +Kaitlin Hoang <kthoang@amazon.com> <khoang98@users.noreply.github.com> Kevin Greene <kevin.greene@delphix.com> <104801862+kxgreene@users.noreply.github.com> Kevin Jin <lostking2008@hotmail.com> <33590050+jxdking@users.noreply.github.com> Kevin P. Fleming <kevin@km6g.us> <kpfleming@users.noreply.github.com> diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS index a9d249a66f1e..6c34c07f39ef 100644 --- a/sys/contrib/openzfs/AUTHORS +++ b/sys/contrib/openzfs/AUTHORS @@ -10,6 +10,7 @@ PAST MAINTAINERS: CONTRIBUTORS: Aaron Fineman <abyxcos@gmail.com> + Achill Gilgenast <achill@achill.org> Adam D. Moss <c@yotes.com> Adam Leventhal <ahl@delphix.com> Adam Stevko <adam.stevko@gmail.com> @@ -59,6 +60,7 @@ CONTRIBUTORS: Andreas Buschmann <andreas.buschmann@tech.net.de> Andreas Dilger <adilger@intel.com> Andreas Vögele <andreas@andreasvoegele.com> + Andres <a-d-j-i@users.noreply.github.com> Andrew Barnes <barnes333@gmail.com> Andrew Hamilton <ahamilto@tjhsst.edu> Andrew Innes <andrew.c12@gmail.com> @@ -72,6 +74,7 @@ CONTRIBUTORS: Andrey Prokopenko <job@terem.fr> Andrey Vesnovaty <andrey.vesnovaty@gmail.com> Andriy Gapon <avg@freebsd.org> + Andriy Tkachuk <andriy.tkachuk@seagate.com> Andy Bakun <github@thwartedefforts.org> Andy Fiddaman <omnios@citrus-it.co.uk> Aniruddha Shankar <k@191a.net> @@ -120,6 +123,7 @@ CONTRIBUTORS: Caleb James DeLisle <calebdelisle@lavabit.com> Cameron Harr <harr1@llnl.gov> Cao Xuewen <cao.xuewen@zte.com.cn> + Carl George <carlwgeorge@gmail.com> Carlo Landmeter <clandmeter@gmail.com> Carlos Alberto Lopez Perez <clopez@igalia.com> Cedric Maunoury <cedric.maunoury@gmail.com> @@ -200,6 +204,7 @@ CONTRIBUTORS: Dimitri John Ledkov <xnox@ubuntu.com> Dimitry Andric <dimitry@andric.com> Dirkjan Bussink <d.bussink@gmail.com> + Diwakar Kristappagari <diwakar-k@hpe.com> Dmitry Khasanov <pik4ez@gmail.com> Dominic Pearson <dsp@technoanimal.net> Dominik Hassler <hadfl@omniosce.org> @@ -250,6 +255,7 @@ CONTRIBUTORS: George Wilson <gwilson@delphix.com> Georgy Yakovlev <ya@sysdump.net> Gerardwx <gerardw@alum.mit.edu> + Germano Massullo <germano.massullo@gmail.com> Gian-Carlo DeFazio <defazio1@llnl.gov> Gionatan Danti <g.danti@assyoma.it> Giuseppe Di Natale <guss80@gmail.com> @@ -287,6 +293,7 @@ CONTRIBUTORS: Igor K <igor@dilos.org> Igor Kozhukhov <ikozhukhov@gmail.com> Igor Lvovsky <ilvovsky@gmail.com> + Igor Ostapenko <pm@igoro.pro> ilbsmart <wgqimut@gmail.com> Ilkka Sovanto <github@ilkka.kapsi.fi> illiliti <illiliti@protonmail.com> @@ -326,6 +333,7 @@ CONTRIBUTORS: Jinshan Xiong <jinshan.xiong@intel.com> Jitendra Patidar <jitendra.patidar@nutanix.com> JK Dingwall <james@dingwall.me.uk> + Joel Low <joel@joelsplace.sg> Joe Stein <joe.stein@delphix.com> John-Mark Gurney <jmg@funkthat.com> John Albietz <inthecloud247@gmail.com> @@ -374,6 +382,7 @@ CONTRIBUTORS: Kevin Jin <lostking2008@hotmail.com> Kevin P. Fleming <kevin@km6g.us> Kevin Tanguy <kevin.tanguy@ovh.net> + khoang98 <khoang98@users.noreply.github.com> KireinaHoro <i@jsteward.moe> Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl> Kleber TarcÃsio <klebertarcisio@yahoo.com.br> @@ -447,6 +456,7 @@ CONTRIBUTORS: Max Zettlmeißl <max@zettlmeissl.de> Md Islam <mdnahian@outlook.com> megari <megari@iki.fi> + Meriel Luna Mittelbach <lunarlambda@gmail.com> Michael D Labriola <michael.d.labriola@gmail.com> Michael Franzl <michael@franzl.name> Michael Gebetsroither <michael@mgeb.org> @@ -494,6 +504,7 @@ CONTRIBUTORS: Orivej Desh <orivej@gmx.fr> Pablo Correa Gómez <ablocorrea@hotmail.com> Palash Gandhi <pbg4930@rit.edu> + Patrick Fasano <patrick@patrickfasano.com> Patrick Mooney <pmooney@pfmooney.com> Patrik Greco <sikevux@sikevux.se> Paul B. Henson <henson@acm.org> @@ -535,6 +546,7 @@ CONTRIBUTORS: Remy Blank <remy.blank@pobox.com> renelson <bnelson@nelsonbe.com> Reno Reckling <e-github@wthack.de> + René Wirnata <rene.wirnata@pandascience.net> Ricardo M. Correia <ricardo.correia@oracle.com> Riccardo Schirone <rschirone91@gmail.com> Richard Allen <belperite@gmail.com> @@ -640,6 +652,7 @@ CONTRIBUTORS: tleydxdy <shironeko.github@tesaguri.club> Tobin Harding <me@tobin.cc> Todd Seidelmann <seidelma@users.noreply.github.com> + Todd Zullinger <tmz@pobox.com> Tom Caputi <tcaputi@datto.com> Tom Matthews <tom@axiom-partners.com> Tomohiro Kusumi <kusumi.tomohiro@gmail.com> diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 47f0795bfa11..42f65290e4e3 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.3.99 -Release: 1 +Version: 2.4.0 +Release: rc1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.15 +Linux-Maximum: 6.16 Linux-Minimum: 4.18 diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 66d5fbd6adbe..adaa5cd10961 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -127,6 +127,7 @@ static zfs_range_tree_t *mos_refd_objs; static spa_t *spa; static objset_t *os; static boolean_t kernel_init_done; +static boolean_t corruption_found = B_FALSE; static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); @@ -250,6 +251,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) &e->svbr_blk, B_TRUE); (void) printf("\tERROR: %d unmatched FREE(s): %s\n", e->svbr_refcnt, blkbuf); + corruption_found = B_TRUE; } zfs_btree_destroy(&sv->sv_pair); @@ -405,6 +407,7 @@ verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), (u_longlong_t)found->svb_allocated_txg, (u_longlong_t)txg); + corruption_found = B_TRUE; } } } @@ -426,6 +429,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_add(mv->mv_allocated, offset, size); @@ -439,6 +443,7 @@ metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) (u_longlong_t)txg, (u_longlong_t)offset, (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, (u_longlong_t)mv->mv_msid); + corruption_found = B_TRUE; } else { zfs_range_tree_remove(mv->mv_allocated, offset, size); @@ -526,6 +531,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -542,6 +548,7 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); + corruption_found = B_TRUE; continue; } @@ -655,6 +662,7 @@ livelist_metaslab_validate(spa_t *spa) } (void) printf("ERROR: Found livelist blocks marked as allocated " "for indirect vdevs:\n"); + corruption_found = B_TRUE; zfs_btree_index_t *where = NULL; sublivelist_verify_block_t *svb; @@ -827,7 +835,7 @@ usage(void) (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); - zdb_exit(1); + zdb_exit(2); } static void @@ -1578,9 +1586,8 @@ dump_spacemap(objset_t *os, space_map_t *sm) continue; } - uint8_t words; char entry_type; - uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; + uint64_t entry_off, entry_run, entry_vdev; if (sm_entry_is_single_word(word)) { entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? @@ -1588,35 +1595,43 @@ dump_spacemap(objset_t *os, space_map_t *sm) entry_off = (SM_OFFSET_DECODE(word) << mapshift) + sm->sm_start; entry_run = SM_RUN_DECODE(word) << mapshift; - words = 1; + + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run); } else { /* it is a two-word entry so we read another word */ ASSERT(sm_entry_is_double_word(word)); uint64_t extra_word; offset += sizeof (extra_word); + ASSERT3U(offset, <, space_map_length(sm)); VERIFY0(dmu_read(os, space_map_object(sm), offset, sizeof (extra_word), &extra_word, DMU_READ_PREFETCH)); - ASSERT3U(offset, <=, space_map_length(sm)); - entry_run = SM2_RUN_DECODE(word) << mapshift; entry_vdev = SM2_VDEV_DECODE(word); entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 'A' : 'F'; entry_off = (SM2_OFFSET_DECODE(extra_word) << mapshift) + sm->sm_start; - words = 2; - } - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", - (u_longlong_t)entry_id, - entry_type, (u_longlong_t)entry_off, - (u_longlong_t)(entry_off + entry_run), - (u_longlong_t)entry_run, - (u_longlong_t)entry_vdev, words); + if (zopt_metaslab_args == 0 || + zopt_metaslab[0] == entry_vdev) { + (void) printf("\t [%6llu] %c " + "range: %012llx-%012llx size: %08llx " + "vdev: %llu\n", + (u_longlong_t)entry_id, entry_type, + (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run - 1), + (u_longlong_t)entry_run, + (u_longlong_t)entry_vdev); + } + } if (entry_type == 'A') alloc += entry_run; @@ -1865,7 +1880,7 @@ dump_metaslabs(spa_t *spa) (void) printf("\nMetaslabs:\n"); - if (!dump_opt['d'] && zopt_metaslab_args > 0) { + if (zopt_metaslab_args > 0) { c = zopt_metaslab[0]; if (c >= children) @@ -2583,19 +2598,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, } } -static void +static u_longlong_t print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp) { char blkbuf[BP_SPRINTF_LEN]; + u_longlong_t offset; int l; - if (!BP_IS_EMBEDDED(bp)) { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } + offset = (u_longlong_t)blkid2offset(dnp, bp, zb); - (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); + (void) printf("%16llx ", offset); ASSERT(zb->zb_level >= 0); @@ -2610,19 +2623,38 @@ print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); - (void) printf("%s\n", blkbuf); + (void) printf("%s", blkbuf); + + if (!BP_IS_EMBEDDED(bp)) { + if (BP_GET_TYPE(bp) != dnp->dn_type) { + (void) printf(" (ERROR: Block pointer type " + "(%llu) does not match dnode type (%hhu))", + BP_GET_TYPE(bp), dnp->dn_type); + corruption_found = B_TRUE; + } + if (BP_GET_LEVEL(bp) != zb->zb_level) { + (void) printf(" (ERROR: Block pointer level " + "(%llu) does not match bookmark level (%lld))", + BP_GET_LEVEL(bp), (u_longlong_t)zb->zb_level); + corruption_found = B_TRUE; + } + } + (void) printf("\n"); + + return (offset); } static int visit_indirect(spa_t *spa, const dnode_phys_t *dnp, blkptr_t *bp, const zbookmark_phys_t *zb) { + u_longlong_t offset; int err = 0; if (BP_GET_BIRTH(bp) == 0) return (0); - print_indirect(spa, bp, zb, dnp); + offset = print_indirect(spa, bp, zb, dnp); if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { arc_flags_t flags = ARC_FLAG_WAIT; @@ -2652,8 +2684,15 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, break; fill += BP_GET_FILL(cbp); } - if (!err) - ASSERT3U(fill, ==, BP_GET_FILL(bp)); + if (!err) { + if (fill != BP_GET_FILL(bp)) { + (void) printf("%16llx: Block pointer " + "fill (%llu) does not match calculated " + "value (%llu)\n", offset, BP_GET_FILL(bp), + (u_longlong_t)fill); + corruption_found = B_TRUE; + } + } arc_buf_destroy(buf, &buf); } @@ -2909,6 +2948,7 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } dump_full_bpobj(&subbpo, "subobj", indent + 1); @@ -3088,6 +3128,7 @@ bpobj_count_refd(bpobj_t *bpo) (void) printf("ERROR %u while trying to open " "subobj id %llu\n", error, (u_longlong_t)subobj); + corruption_found = B_TRUE; continue; } bpobj_count_refd(&subbpo); @@ -9634,7 +9675,7 @@ main(int argc, char **argv) } else if (objset_str && !zdb_numeric(objset_str + 1) && dump_opt['N']) { printf("Supply a numeric objset ID with -N\n"); - error = 1; + error = 2; goto fini; } } else { @@ -9936,5 +9977,8 @@ fini: if (kernel_init_done) kernel_fini(); + if (corruption_found && error == 0) + error = 3; + return (error); } diff --git a/sys/contrib/openzfs/config/kernel-mkdir.m4 b/sys/contrib/openzfs/config/kernel-mkdir.m4 index c1aebc387abe..78b32447c593 100644 --- a/sys/contrib/openzfs/config/kernel-mkdir.m4 +++ b/sys/contrib/openzfs/config/kernel-mkdir.m4 @@ -84,6 +84,8 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ AC_DEFINE(HAVE_IOPS_MKDIR_DENTRY, 1, [iops->mkdir() returns struct dentry*]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 6.3 API change dnl # mkdir() takes struct mnt_idmap * as the first arg diff --git a/sys/contrib/openzfs/config/toolchain-simd.m4 b/sys/contrib/openzfs/config/toolchain-simd.m4 index 344807fc830c..f18c91007cde 100644 --- a/sys/contrib/openzfs/config/toolchain-simd.m4 +++ b/sys/contrib/openzfs/config/toolchain-simd.m4 @@ -24,6 +24,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES + ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES @@ -447,6 +449,48 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [ ]) dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES], [ + AC_MSG_CHECKING([whether host toolchain supports VAES]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + int main() + { + __asm__ __volatile__("vaesenc %ymm0, %ymm1, %ymm0"); + return (0); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_VAES], 1, [Define if host toolchain supports VAES]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # +dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ +dnl # +AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ], [ + AC_MSG_CHECKING([whether host toolchain supports VPCLMULQDQ]) + + AC_LINK_IFELSE([AC_LANG_SOURCE([ + [ + int main() + { + __asm__ __volatile__("vpclmulqdq %0, %%ymm4, %%ymm3, %%ymm5" :: "i"(0)); + return (0); + } + ]])], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_VPCLMULQDQ], 1, [Define if host toolchain supports VPCLMULQDQ]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +dnl # dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE dnl # AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [ diff --git a/sys/contrib/openzfs/contrib/debian/control b/sys/contrib/openzfs/contrib/debian/control index 96a2bdd88665..c5358dedc0fd 100644 --- a/sys/contrib/openzfs/contrib/debian/control +++ b/sys/contrib/openzfs/contrib/debian/control @@ -100,8 +100,8 @@ Depends: ${misc:Depends}, ${shlibs:Depends} # The libcurl4 is loaded through dlopen("libcurl.so.4"). # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521 Recommends: libcurl4 -Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux -Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux +Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux, openzfs-libzfs4 +Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux, openzfs-libzfs4 Conflicts: libzfs6linux Description: OpenZFS filesystem library for Linux - general support OpenZFS is a storage platform that encompasses the functionality of @@ -128,8 +128,8 @@ Package: openzfs-libzpool6 Section: contrib/libs Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} -Breaks: libzpool2, libzpool5, libzpool5linux, libzpool6linux -Replaces: libzpool2, libzpool5, libzpool5linux, libzpool6linux +Breaks: libzpool2, libzpool5, libzpool6linux +Replaces: libzpool2, libzpool5, libzpool6linux Conflicts: libzpool6linux Description: OpenZFS pool library for Linux OpenZFS is a storage platform that encompasses the functionality of diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in index 3226d604546c..2b0568938b25 100755 --- a/sys/contrib/openzfs/contrib/debian/rules.in +++ b/sys/contrib/openzfs/contrib/debian/rules.in @@ -93,7 +93,7 @@ override_dh_auto_install: @# Install the DKMS source. @# We only want the files needed to build the modules install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \ - '$(CURDIR)/scripts/dkms.postbuild' + '$(CURDIR)/scripts/dkms.postbuild' '$(CURDIR)/scripts/objtool-wrapper.in' $(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;) @# Only ever build Linux modules @@ -108,8 +108,8 @@ override_dh_auto_install: @# - zfs.release$ @# * Takes care of spaces and tabs @# * Remove reference to ZFS_AC_PACKAGE - awk '/^AC_CONFIG_FILES\(\[/,/^\]\)/ {\ - if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))/) \ + awk '/^AC_CONFIG_FILES\(\[/,/\]\)/ {\ + if ($$0 !~ /^(AC_CONFIG_FILES\(\[([ \t]+)?$$|\]\)([ \t]+)?$$|([ \t]+)?(include\/(Makefile|sys|os\/(Makefile|linux))|module\/|Makefile([ \t]+)?$$|zfs\.release([ \t]+)?$$))|scripts\/objtool-wrapper.*\]\)$$/) \ {next} } {print}' \ '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' | sed '/ZFS_AC_PACKAGE/d' > '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/configure.ac' @# Set "SUBDIRS = module include" for CONFIG_KERNEL and remove SUBDIRS for all other configs. diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE new file mode 100644 index 000000000000..04c03a37e0cb --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/LICENSE @@ -0,0 +1,253 @@ +BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL +licensing. Files that are completely new have a Google copyright and an ISC +license. This license is reproduced at the bottom of this file. + +Contributors to BoringSSL are required to follow the CLA rules for Chromium: +https://cla.developers.google.com/clas + +Files in third_party/ have their own licenses, as described therein. The MIT +license, for third_party/fiat, which, unlike other third_party directories, is +compiled into non-test libraries, is included below. + +The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the +OpenSSL License and the original SSLeay license apply to the toolkit. See below +for the actual license texts. Actually both licenses are BSD-style Open Source +licenses. In case of any license issues related to OpenSSL please contact +openssl-core@openssl.org. + +The following are Google-internal bug numbers where explicit permission from +some authors is recorded for use of their work. (This is purely for our own +record keeping.) + 27287199 + 27287880 + 27287883 + 263291445 + + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +ISC license used for completely new code in BoringSSL: + +/* Copyright 2015 The BoringSSL Authors + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + +The code in third_party/fiat carries the MIT license: + +Copyright (c) 2015-2016 the fiat-crypto authors (see +https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Licenses for support code +------------------------- + +Parts of the TLS test suite are under the Go license. This code is not included +in BoringSSL (i.e. libcrypto and libssl) when compiled, however, so +distributing code linked against BoringSSL does not trigger this license: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +BoringSSL uses the Chromium test infrastructure to run a continuous build, +trybots etc. The scripts which manage this, and the script for generating build +metadata, are under the Chromium license. Distributing code linked against +BoringSSL does not trigger this license. + +Copyright 2015 The Chromium Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README new file mode 100644 index 000000000000..aa6fb6d477fa --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/README @@ -0,0 +1,11 @@ +This directory contains the original BoringSSL [1] GCM x86-64 assembly +files [2]. + +The assembler files where then further modified to fit the ICP conventions. + +The main purpose to include these files (and the original ones) here, is to +serve as a reference if upstream changes need to be applied to the files +included and modified in the ICP. + +[1] https://github.com/google/boringssl +[2] https://github.com/google/boringssl/blob/d5440dd2c2c500ac2d3bba4afec47a054b4d99ae/gen/bcm/aes-gcm-avx2-x86_64-linux.S diff --git a/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S new file mode 100644 index 000000000000..e7327c9de872 --- /dev/null +++ b/sys/contrib/openzfs/contrib/icp/gcm-simd/boringssl/aes-gcm-avx2-x86_64-linux.S @@ -0,0 +1,1328 @@ +// SPDX-License-Identifier: Apache-2.0 +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#include <openssl/asm_base.h> + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) +.section .rodata +.align 16 + + +.Lbswap_mask: +.quad 0x08090a0b0c0d0e0f, 0x0001020304050607 + + + + + + + + +.Lgfpoly: +.quad 1, 0xc200000000000000 + + +.Lgfpoly_and_internal_carrybit: +.quad 1, 0xc200000000000001 + +.align 32 + +.Lctr_pattern: +.quad 0, 0 +.quad 1, 0 +.Linc_2blocks: +.quad 2, 0 +.quad 2, 0 + +.text +.globl gcm_init_vpclmulqdq_avx2 +.hidden gcm_init_vpclmulqdq_avx2 +.type gcm_init_vpclmulqdq_avx2,@function +.align 32 +gcm_init_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + + + vpshufd $0x4e,(%rsi),%xmm3 + + + + + + vpshufd $0xd3,%xmm3,%xmm0 + vpsrad $31,%xmm0,%xmm0 + vpaddq %xmm3,%xmm3,%xmm3 + vpand .Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + + vbroadcasti128 .Lgfpoly(%rip),%ymm6 + + + vpclmulqdq $0x00,%xmm3,%xmm3,%xmm0 + vpclmulqdq $0x01,%xmm3,%xmm3,%xmm1 + vpclmulqdq $0x10,%xmm3,%xmm3,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm6,%xmm2 + vpshufd $0x4e,%xmm0,%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm1,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm3,%xmm5 + vpclmulqdq $0x01,%xmm1,%xmm6,%xmm0 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm5,%xmm5 + vpxor %xmm0,%xmm5,%xmm5 + + + + vinserti128 $1,%xmm3,%ymm5,%ymm3 + vinserti128 $1,%xmm5,%ymm5,%ymm5 + + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + + + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,64(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128+32(%rdi) + + + vpclmulqdq $0x00,%ymm5,%ymm4,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm4,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm4,%ymm3 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm3,%ymm3 + vpxor %ymm0,%ymm3,%ymm3 + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,0(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128(%rdi) + + vzeroupper + ret + +.cfi_endproc +.size gcm_init_vpclmulqdq_avx2, . - gcm_init_vpclmulqdq_avx2 +.globl gcm_gmult_vpclmulqdq_avx2 +.hidden gcm_gmult_vpclmulqdq_avx2 +.type gcm_gmult_vpclmulqdq_avx2,@function +.align 32 +gcm_gmult_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + vmovdqu (%rdi),%xmm0 + vmovdqu .Lbswap_mask(%rip),%xmm1 + vmovdqu 128-16(%rsi),%xmm2 + vmovdqu .Lgfpoly(%rip),%xmm3 + vpshufb %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6 + vpshufd $0x4e,%xmm4,%xmm4 + vpxor %xmm4,%xmm5,%xmm5 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4 + vpshufd $0x4e,%xmm5,%xmm5 + vpxor %xmm5,%xmm0,%xmm0 + vpxor %xmm4,%xmm0,%xmm0 + + + vpshufb %xmm1,%xmm0,%xmm0 + vmovdqu %xmm0,(%rdi) + ret + +.cfi_endproc +.size gcm_gmult_vpclmulqdq_avx2, . - gcm_gmult_vpclmulqdq_avx2 +.globl gcm_ghash_vpclmulqdq_avx2 +.hidden gcm_ghash_vpclmulqdq_avx2 +.type gcm_ghash_vpclmulqdq_avx2,@function +.align 32 +gcm_ghash_vpclmulqdq_avx2: +.cfi_startproc + +_CET_ENDBR + + + + + + + vmovdqu .Lbswap_mask(%rip),%xmm6 + vmovdqu .Lgfpoly(%rip),%xmm7 + + + vmovdqu (%rdi),%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + + + cmpq $32,%rcx + jb .Lghash_lastblock + + + + vinserti128 $1,%xmm6,%ymm6,%ymm6 + vinserti128 $1,%xmm7,%ymm7,%ymm7 + + cmpq $127,%rcx + jbe .Lghash_loop_1x + + + vmovdqu 128(%rsi),%ymm8 + vmovdqu 128+32(%rsi),%ymm9 +.Lghash_loop_4x: + + vmovdqu 0(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 0(%rsi),%ymm2 + vpxor %ymm5,%ymm1,%ymm1 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm8,%ymm0,%ymm4 + + vmovdqu 32(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 32(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm8,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu 64(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + + vmovdqu 96(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 96(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm5,%ymm4,%ymm4 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm3,%ymm2,%ymm0 + vpshufd $0x4e,%ymm3,%ymm3 + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vpclmulqdq $0x01,%ymm4,%ymm2,%ymm0 + vpshufd $0x4e,%ymm4,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm0,%ymm5,%ymm5 + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + + subq $-128,%rdx + addq $-128,%rcx + cmpq $127,%rcx + ja .Lghash_loop_4x + + + cmpq $32,%rcx + jb .Lghash_loop_1x_done +.Lghash_loop_1x: + vmovdqu (%rdx),%ymm0 + vpshufb %ymm6,%ymm0,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vmovdqu 128-32(%rsi),%ymm0 + vpclmulqdq $0x00,%ymm0,%ymm5,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2 + vpclmulqdq $0x10,%ymm0,%ymm5,%ymm3 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x01,%ymm1,%ymm7,%ymm3 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm2,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x11,%ymm0,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm7,%ymm1 + vpshufd $0x4e,%ymm2,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm1,%ymm5,%ymm5 + + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + addq $32,%rdx + subq $32,%rcx + cmpq $32,%rcx + jae .Lghash_loop_1x +.Lghash_loop_1x_done: + + +.Lghash_lastblock: + testq %rcx,%rcx + jz .Lghash_done + vmovdqu (%rdx),%xmm0 + vpshufb %xmm6,%xmm0,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 128-16(%rsi),%xmm0 + vpclmulqdq $0x00,%xmm0,%xmm5,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2 + vpclmulqdq $0x10,%xmm0,%xmm5,%xmm3 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x01,%xmm1,%xmm7,%xmm3 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm2,%xmm2 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm2,%xmm7,%xmm1 + vpshufd $0x4e,%xmm2,%xmm2 + vpxor %xmm2,%xmm5,%xmm5 + vpxor %xmm1,%xmm5,%xmm5 + + +.Lghash_done: + + vpshufb %xmm6,%xmm5,%xmm5 + vmovdqu %xmm5,(%rdi) + + vzeroupper + ret + +.cfi_endproc +.size gcm_ghash_vpclmulqdq_avx2, . - gcm_ghash_vpclmulqdq_avx2 +.globl aes_gcm_enc_update_vaes_avx2 +.hidden aes_gcm_enc_update_vaes_avx2 +.type aes_gcm_enc_update_vaes_avx2,@function +.align 32 +aes_gcm_enc_update_vaes_avx2: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+8(%rip) +#endif + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 240(%rcx),%r10d + leal -20(,%r10,4),%r10d + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func1 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + leaq 16(%rcx),%rax +.Lvaesenc_loop_first_4_vecs__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_first_4_vecs__func1 + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + addq $-128,%rdx + cmpq $127,%rdx + jbe .Lghash_last_ciphertext_4x__func1 +.align 16 +.Lcrypt_loop_4x__func1: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func1 + je .Laes192__func1 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func1: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func1: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + subq $-128,%rsi + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func1 +.Lghash_last_ciphertext_4x__func1: + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + subq $-128,%rsi +.Lcrypt_loop_4x_done__func1: + + testq %rdx,%rdx + jz .Ldone__func1 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func1 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %ymm0,%ymm13,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func1 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func1: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func1 + je .Lxor_two_blocks__func1 + +.Lxor_three_blocks__func1: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_two_blocks__func1: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_one_block__func1: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func1: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func1: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func1: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret + +.cfi_endproc +.size aes_gcm_enc_update_vaes_avx2, . - aes_gcm_enc_update_vaes_avx2 +.globl aes_gcm_dec_update_vaes_avx2 +.hidden aes_gcm_dec_update_vaes_avx2 +.type aes_gcm_dec_update_vaes_avx2,@function +.align 32 +aes_gcm_dec_update_vaes_avx2: +.cfi_startproc + +_CET_ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 240(%rcx),%r10d + leal -20(,%r10,4),%r10d + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func2 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 +.align 16 +.Lcrypt_loop_4x__func2: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func2 + je .Laes192__func2 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func2: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func2: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + subq $-128,%rsi + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func2 +.Lcrypt_loop_4x_done__func2: + + testq %rdx,%rdx + jz .Ldone__func2 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func2 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %ymm0,%ymm3,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func2 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func2: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func2 + je .Lxor_two_blocks__func2 + +.Lxor_three_blocks__func2: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %xmm0,%xmm3,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_two_blocks__func2: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm2,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_one_block__func2: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm2,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func2: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func2: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func2: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + ret + +.cfi_endproc +.size aes_gcm_dec_update_vaes_avx2, . - aes_gcm_dec_update_vaes_avx2 +#endif diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index c569b2528368..67707e9d80f4 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -979,7 +979,8 @@ mountroot() touch /run/zfs_unlock_complete if [ -e /run/zfs_unlock_complete_notify ]; then - read -r < /run/zfs_unlock_complete_notify + # shellcheck disable=SC2034 + read -r zfs_unlock_complete_notify < /run/zfs_unlock_complete_notify fi # ------------ diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h index e8004e18c4a4..326f471d7c9b 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_x86.h @@ -598,6 +598,32 @@ zfs_movbe_available(void) } /* + * Check if VAES instruction set is available + */ +static inline boolean_t +zfs_vaes_available(void) +{ +#if defined(X86_FEATURE_VAES) + return (!!boot_cpu_has(X86_FEATURE_VAES)); +#else + return (B_FALSE); +#endif +} + +/* + * Check if VPCLMULQDQ instruction set is available + */ +static inline boolean_t +zfs_vpclmulqdq_available(void) +{ +#if defined(X86_FEATURE_VPCLMULQDQ) + return (!!boot_cpu_has(X86_FEATURE_VPCLMULQDQ)); +#else + return (B_FALSE); +#endif +} + +/* * Check if SHA_NI instruction set is available */ static inline boolean_t diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h index 955462c85d10..e34ea46b3fe8 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h @@ -139,18 +139,18 @@ #define ZCW_TP_STRUCT_ENTRY \ __field(lwb_t *, zcw_lwb) \ __field(boolean_t, zcw_done) \ - __field(int, zcw_zio_error) \ + __field(int, zcw_error) \ #define ZCW_TP_FAST_ASSIGN \ __entry->zcw_lwb = zcw->zcw_lwb; \ __entry->zcw_done = zcw->zcw_done; \ - __entry->zcw_zio_error = zcw->zcw_zio_error; + __entry->zcw_error = zcw->zcw_error; #define ZCW_TP_PRINTK_FMT \ "zcw { lwb %p done %u error %u }" #define ZCW_TP_PRINTK_ARGS \ - __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_zio_error + __entry->zcw_lwb, __entry->zcw_done, __entry->zcw_error /* * Generic support for two argument tracepoints of the form: diff --git a/sys/contrib/openzfs/include/sys/dmu_impl.h b/sys/contrib/openzfs/include/sys/dmu_impl.h index 21a8b16a3ee6..bae872bd1907 100644 --- a/sys/contrib/openzfs/include/sys/dmu_impl.h +++ b/sys/contrib/openzfs/include/sys/dmu_impl.h @@ -168,12 +168,10 @@ extern "C" { * dn_allocated_txg * dn_free_txg * dn_assigned_txg - * dn_dirty_txg + * dn_dirtycnt * dd_assigned_tx * dn_notxholds * dn_nodnholds - * dn_dirtyctx - * dn_dirtyctx_firstset * (dn_phys copy fields?) * (dn_phys contents?) * held from: diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h index 76218c8b09ca..8bd1db5b7165 100644 --- a/sys/contrib/openzfs/include/sys/dnode.h +++ b/sys/contrib/openzfs/include/sys/dnode.h @@ -141,12 +141,6 @@ struct dmu_buf_impl; struct objset; struct zio; -enum dnode_dirtycontext { - DN_UNDIRTIED, - DN_DIRTY_OPEN, - DN_DIRTY_SYNC -}; - /* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */ #define DNODE_FLAG_USED_BYTES (1 << 0) #define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1) @@ -340,11 +334,9 @@ struct dnode { uint64_t dn_allocated_txg; uint64_t dn_free_txg; uint64_t dn_assigned_txg; - uint64_t dn_dirty_txg; /* txg dnode was last dirtied */ + uint8_t dn_dirtycnt; kcondvar_t dn_notxholds; kcondvar_t dn_nodnholds; - enum dnode_dirtycontext dn_dirtyctx; - const void *dn_dirtyctx_firstset; /* dbg: contents meaningless */ /* protected by own devices */ zfs_refcount_t dn_tx_holds; @@ -440,7 +432,6 @@ void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); -void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); @@ -468,9 +459,6 @@ void dnode_free_interior_slots(dnode_t *dn); void dnode_set_storage_type(dnode_t *dn, dmu_object_type_t type); -#define DNODE_IS_DIRTY(_dn) \ - ((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa)) - #define DNODE_LEVEL_IS_CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ (((_level) > 0 || DMU_OT_IS_METADATA((_dn)->dn_type)) && \ diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index db6de332ae67..66db16b33c51 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -880,7 +880,6 @@ extern kcondvar_t spa_namespace_cv; #define SPA_CONFIG_UPDATE_VDEVS 1 extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t); -extern void spa_config_load(void); extern int spa_all_configs(uint64_t *generation, nvlist_t **pools); extern void spa_config_set(spa_t *spa, nvlist_t *config); extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, @@ -1244,7 +1243,6 @@ extern void vdev_mirror_stat_fini(void); /* Initialization and termination */ extern void spa_init(spa_mode_t mode); extern void spa_fini(void); -extern void spa_boot_init(void *); /* properties */ extern int spa_prop_set(spa_t *spa, nvlist_t *nvp); diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h index 44b776e16b52..ea1364a7e35a 100644 --- a/sys/contrib/openzfs/include/sys/zil_impl.h +++ b/sys/contrib/openzfs/include/sys/zil_impl.h @@ -41,8 +41,8 @@ extern "C" { * * An lwb will start out in the "new" state, and transition to the "opened" * state via a call to zil_lwb_write_open() on first itx assignment. When - * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be - * held. + * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" and + * LWB's "lwb_lock" must be held. * * After the lwb is "opened", it can be assigned number of itxs and transition * into the "closed" state via zil_lwb_write_close() when full or on timeout. @@ -100,16 +100,22 @@ typedef enum { * holding the "zl_issuer_lock". After the lwb is issued, the zilog's * "zl_lock" is used to protect the lwb against concurrent access. */ +typedef enum { + LWB_FLAG_SLIM = (1<<0), /* log block has slim format */ + LWB_FLAG_SLOG = (1<<1), /* lwb_blk is on SLOG device */ + LWB_FLAG_CRASHED = (1<<2), /* lwb is on the crash list */ +} lwb_flag_t; + typedef struct lwb { zilog_t *lwb_zilog; /* back pointer to log struct */ blkptr_t lwb_blk; /* on disk address of this log blk */ - boolean_t lwb_slim; /* log block has slim format */ - boolean_t lwb_slog; /* lwb_blk is on SLOG device */ + lwb_flag_t lwb_flags; /* extra info about this lwb */ int lwb_error; /* log block allocation error */ int lwb_nmax; /* max bytes in the buffer */ int lwb_nused; /* # used bytes in buffer */ int lwb_nfilled; /* # filled bytes in buffer */ int lwb_sz; /* size of block and buffer */ + int lwb_min_sz; /* min size for range allocation */ lwb_state_t lwb_state; /* the state of this lwb */ char *lwb_buf; /* log write buffer */ zio_t *lwb_child_zio; /* parent zio for children */ @@ -124,7 +130,7 @@ typedef struct lwb { list_t lwb_itxs; /* list of itx's */ list_t lwb_waiters; /* list of zil_commit_waiter's */ avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */ - kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */ + kmutex_t lwb_lock; /* protects lwb_vdev_tree and size */ } lwb_t; /* @@ -149,7 +155,7 @@ typedef struct zil_commit_waiter { list_node_t zcw_node; /* linkage in lwb_t:lwb_waiter list */ lwb_t *zcw_lwb; /* back pointer to lwb when linked */ boolean_t zcw_done; /* B_TRUE when "done", else B_FALSE */ - int zcw_zio_error; /* contains the zio io_error value */ + int zcw_error; /* result to return from zil_commit() */ } zil_commit_waiter_t; /* diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 4f46eab3db89..353805fcb969 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -360,26 +360,26 @@ struct zbookmark_err_phys { (zb)->zb_blkid == ZB_ROOT_BLKID) typedef struct zio_prop { - enum zio_checksum zp_checksum; - enum zio_compress zp_compress; + enum zio_checksum zp_checksum:8; + enum zio_compress zp_compress:8; uint8_t zp_complevel; uint8_t zp_level; uint8_t zp_copies; uint8_t zp_gang_copies; - dmu_object_type_t zp_type; - boolean_t zp_dedup; - boolean_t zp_dedup_verify; - boolean_t zp_nopwrite; - boolean_t zp_brtwrite; - boolean_t zp_encrypt; - boolean_t zp_byteorder; - boolean_t zp_direct_write; - boolean_t zp_rewrite; + dmu_object_type_t zp_type:8; + dmu_object_type_t zp_storage_type:8; + boolean_t zp_dedup:1; + boolean_t zp_dedup_verify:1; + boolean_t zp_nopwrite:1; + boolean_t zp_brtwrite:1; + boolean_t zp_encrypt:1; + boolean_t zp_byteorder:1; + boolean_t zp_direct_write:1; + boolean_t zp_rewrite:1; + uint32_t zp_zpl_smallblk; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; - uint32_t zp_zpl_smallblk; - dmu_object_type_t zp_storage_type; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -622,7 +622,8 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_flag_t flags); extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, - blkptr_t *new_bp, uint64_t size, boolean_t *slog); + blkptr_t *new_bp, uint64_t min_size, uint64_t max_size, boolean_t *slog, + boolean_t allow_larger); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); diff --git a/sys/contrib/openzfs/include/sys/zvol_impl.h b/sys/contrib/openzfs/include/sys/zvol_impl.h index f3dd9f26f23c..5422e66832c0 100644 --- a/sys/contrib/openzfs/include/sys/zvol_impl.h +++ b/sys/contrib/openzfs/include/sys/zvol_impl.h @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ #ifndef _SYS_ZVOL_IMPL_H @@ -56,6 +56,7 @@ typedef struct zvol_state { atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ kcondvar_t zv_removing_cv; /* ready to remove minor */ + list_node_t zv_remove_node; /* node on removal list */ struct zvol_state_os *zv_zso; /* private platform state */ boolean_t zv_threading; /* volthreading property */ } zvol_state_t; @@ -135,7 +136,7 @@ int zvol_os_rename_minor(zvol_state_t *zv, const char *newname); int zvol_os_create_minor(const char *name); int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize); boolean_t zvol_os_is_zvol(const char *path); -void zvol_os_clear_private(zvol_state_t *zv); +void zvol_os_remove_minor(zvol_state_t *zv); void zvol_os_set_disk_ro(zvol_state_t *zv, int flags); void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity); diff --git a/sys/contrib/openzfs/lib/libicp/Makefile.am b/sys/contrib/openzfs/lib/libicp/Makefile.am index ce24d13a760f..23adba10bc44 100644 --- a/sys/contrib/openzfs/lib/libicp/Makefile.am +++ b/sys/contrib/openzfs/lib/libicp/Makefile.am @@ -69,6 +69,7 @@ nodist_libicp_la_SOURCES += \ module/icp/asm-x86_64/aes/aes_aesni.S \ module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \ module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \ + module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S \ module/icp/asm-x86_64/modes/ghash-x86_64.S \ module/icp/asm-x86_64/sha2/sha256-x86_64.S \ module/icp/asm-x86_64/sha2/sha512-x86_64.S \ diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h index 1ef24f5a7d39..4772a5416b2e 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h @@ -102,7 +102,9 @@ typedef enum cpuid_inst_sets { AES, PCLMULQDQ, MOVBE, - SHA_NI + SHA_NI, + VAES, + VPCLMULQDQ } cpuid_inst_sets_t; /* @@ -127,6 +129,8 @@ typedef struct cpuid_feature_desc { #define _AES_BIT (1U << 25) #define _PCLMULQDQ_BIT (1U << 1) #define _MOVBE_BIT (1U << 22) +#define _VAES_BIT (1U << 9) +#define _VPCLMULQDQ_BIT (1U << 10) #define _SHA_NI_BIT (1U << 29) /* @@ -157,6 +161,8 @@ static const cpuid_feature_desc_t cpuid_features[] = { [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX }, [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX }, [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX }, + [VAES] = {7U, 0U, _VAES_BIT, ECX }, + [VPCLMULQDQ] = {7U, 0U, _VPCLMULQDQ_BIT, ECX }, }; /* @@ -231,6 +237,8 @@ CPUID_FEATURE_CHECK(aes, AES); CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ); CPUID_FEATURE_CHECK(movbe, MOVBE); CPUID_FEATURE_CHECK(shani, SHA_NI); +CPUID_FEATURE_CHECK(vaes, VAES); +CPUID_FEATURE_CHECK(vpclmulqdq, VPCLMULQDQ); /* * Detect register set support @@ -382,6 +390,24 @@ zfs_shani_available(void) } /* + * Check if VAES instruction is available + */ +static inline boolean_t +zfs_vaes_available(void) +{ + return (__cpuid_has_vaes()); +} + +/* + * Check if VPCLMULQDQ instruction is available + */ +static inline boolean_t +zfs_vpclmulqdq_available(void) +{ + return (__cpuid_has_vpclmulqdq()); +} + +/* * AVX-512 family of instruction sets: * * AVX512F Foundation diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index e63153a03370..fea2f81458f9 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -38,6 +38,7 @@ #include <sys/processor.h> #include <sys/rrwlock.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/stat.h> #include <sys/systeminfo.h> #include <sys/time.h> @@ -811,6 +812,79 @@ umem_out_of_memory(void) return (0); } +static void +spa_config_load(void) +{ + void *buf = NULL; + nvlist_t *nvlist, *child; + nvpair_t *nvpair; + char *pathname; + zfs_file_t *fp; + zfs_file_attr_t zfa; + uint64_t fsize; + int err; + + /* + * Open the configuration file. + */ + pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); + + err = zfs_file_open(pathname, O_RDONLY, 0, &fp); + if (err) + err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp); + + kmem_free(pathname, MAXPATHLEN); + + if (err) + return; + + if (zfs_file_getattr(fp, &zfa)) + goto out; + + fsize = zfa.zfa_size; + buf = kmem_alloc(fsize, KM_SLEEP); + + /* + * Read the nvlist from the file. + */ + if (zfs_file_read(fp, buf, fsize, NULL) < 0) + goto out; + + /* + * Unpack the nvlist. + */ + if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) + goto out; + + /* + * Iterate over all elements in the nvlist, creating a new spa_t for + * each one with the specified configuration. + */ + mutex_enter(&spa_namespace_lock); + nvpair = NULL; + while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { + if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) + continue; + + child = fnvpair_value_nvlist(nvpair); + + if (spa_lookup(nvpair_name(nvpair)) != NULL) + continue; + (void) spa_add(nvpair_name(nvpair), child, NULL); + } + mutex_exit(&spa_namespace_lock); + + nvlist_free(nvlist); + +out: + if (buf != NULL) + kmem_free(buf, fsize); + + zfs_file_close(fp); +} + void kernel_init(int mode) { @@ -835,6 +909,7 @@ kernel_init(int mode) zstd_init(); spa_init((spa_mode_t)mode); + spa_config_load(); fletcher_4_init(); diff --git a/sys/contrib/openzfs/lib/libzutil/zutil_import.c b/sys/contrib/openzfs/lib/libzutil/zutil_import.c index a4a6e76a1d09..08367f4c064d 100644 --- a/sys/contrib/openzfs/lib/libzutil/zutil_import.c +++ b/sys/contrib/openzfs/lib/libzutil/zutil_import.c @@ -1903,30 +1903,43 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, *sepp = '\0'; pools = zpool_search_import(hdl, args); + if (pools == NULL) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, "no pools found")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); + free(targetdup); + return (ENOENT); + } - if (pools != NULL) { - nvpair_t *elem = NULL; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - VERIFY0(nvpair_value_nvlist(elem, &config)); - if (pool_match(config, targetdup)) { - count++; - if (match != NULL) { - /* multiple matches found */ - continue; - } else { - match = fnvlist_dup(config); - } + nvpair_t *elem = NULL; + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { + VERIFY0(nvpair_value_nvlist(elem, &config)); + if (pool_match(config, targetdup)) { + count++; + if (match != NULL) { + /* multiple matches found */ + continue; + } else { + match = fnvlist_dup(config); } } - fnvlist_free(pools); } + fnvlist_free(pools); if (count == 0) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no matching pools")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); free(targetdup); return (ENOENT); } if (count > 1) { + zutil_error_aux(hdl, dgettext(TEXT_DOMAIN, + "more than one matching pool")); + (void) zutil_error_fmt(hdl, LPC_UNKNOWN, dgettext(TEXT_DOMAIN, + "failed to find config for pool '%s'"), targetdup); free(targetdup); fnvlist_free(match); return (EINVAL); diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 4a5f9fd93f4f..5c7958667f92 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -941,10 +941,6 @@ The target number of bytes the ARC should leave as free memory on the system. If zero, equivalent to the bigger of .Sy 512 KiB No and Sy all_system_memory/64 . . -.It Sy zfs_autoimport_disable Ns = Ns Sy 1 Ns | Ns 0 Pq int -Disable pool import at module load by ignoring the cache file -.Pq Sy spa_config_path . -. .It Sy zfs_checksum_events_per_second Ns = Ns Sy 20 Ns /s Pq uint Rate limit checksum events to this many per second. Note that this should not be set below the ZED thresholds diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index 3984aaac5866..0a5b6af73fdb 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -15,7 +15,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd October 27, 2024 +.Dd April 23, 2025 .Dt ZDB 8 .Os . @@ -531,6 +531,18 @@ option, with more occurrences enabling more verbosity. If no options are specified, all information about the named pool will be displayed at default verbosity. . +.Sh EXIT STATUS +The +.Nm +utility exits +.Sy 0 +on success, +.Sy 1 +if a fatal error occurs, +.Sy 2 +if invalid command line options were specified, or +.Sy 3 +if on-disk corruption was detected, but was not fatal. .Sh EXAMPLES .Ss Example 1 : No Display the configuration of imported pool Ar rpool .Bd -literal diff --git a/sys/contrib/openzfs/man/man8/zfs-send.8 b/sys/contrib/openzfs/man/man8/zfs-send.8 index c920a5a48798..f7c6b840303c 100644 --- a/sys/contrib/openzfs/man/man8/zfs-send.8 +++ b/sys/contrib/openzfs/man/man8/zfs-send.8 @@ -173,8 +173,10 @@ The receiving system must have the feature enabled. If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. Datasets that are sent with this flag may not be received as an encrypted dataset, since encrypted datasets cannot use the .Sy embedded_data @@ -201,8 +203,10 @@ property for details .Pc . If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. If the .Sy large_blocks feature is enabled on the sending system but the @@ -357,8 +361,10 @@ property for details .Pc . If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. If the .Sy large_blocks feature is enabled on the sending system but the @@ -400,8 +406,10 @@ The receiving system must have the feature enabled. If the .Sy lz4_compress -feature is active on the sending system, then the receiving system must have -that feature enabled as well. +or +.Sy zstd_compress +features are active on the sending system, then the receiving system must have +the corresponding features enabled as well. Datasets that are sent with this flag may not be received as an encrypted dataset, since encrypted datasets cannot use the diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index 3d6f288fa5da..362d2295e091 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -135,6 +135,7 @@ ICP_OBJS_X86_64 := \ asm-x86_64/sha2/sha256-x86_64.o \ asm-x86_64/sha2/sha512-x86_64.o \ asm-x86_64/modes/aesni-gcm-x86_64.o \ + asm-x86_64/modes/aesni-gcm-avx2-vaes.o \ asm-x86_64/modes/gcm_pclmulqdq.o \ asm-x86_64/modes/ghash-x86_64.o diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c index c2a982b5a376..3cfa5b8165ce 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -46,6 +46,9 @@ #define IMPL_CYCLE (UINT32_MAX-1) #ifdef CAN_USE_GCM_ASM #define IMPL_AVX (UINT32_MAX-2) +#if CAN_USE_GCM_ASM >= 2 +#define IMPL_AVX2 (UINT32_MAX-3) +#endif #endif #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_gcm_impl = IMPL_FASTEST; @@ -56,17 +59,16 @@ static uint32_t user_sel_impl = IMPL_FASTEST; boolean_t gcm_avx_can_use_movbe = B_FALSE; /* * Whether to use the optimized openssl gcm and ghash implementations. - * Set to true if module parameter icp_gcm_impl == "avx". */ -static boolean_t gcm_use_avx = B_FALSE; -#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) +static gcm_impl gcm_impl_used = GCM_IMPL_GENERIC; +#define GCM_IMPL_USED (*(volatile gcm_impl *)&gcm_impl_used) extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *); static inline boolean_t gcm_avx_will_work(void); -static inline void gcm_set_avx(boolean_t); -static inline boolean_t gcm_toggle_avx(void); -static inline size_t gcm_simd_get_htab_size(boolean_t); +static inline boolean_t gcm_avx2_will_work(void); +static inline void gcm_use_impl(gcm_impl impl); +static inline gcm_impl gcm_toggle_impl(void); static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t); @@ -89,7 +91,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_mode_encrypt_contiguous_blocks_avx( ctx, data, length, out, block_size)); #endif @@ -208,7 +210,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, { (void) copy_block; #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_encrypt_final_avx(ctx, out, block_size)); #endif @@ -374,7 +376,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM - if (ctx->gcm_use_avx == B_TRUE) + if (ctx->impl != GCM_IMPL_GENERIC) return (gcm_decrypt_final_avx(ctx, out, block_size)); #endif @@ -631,23 +633,23 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap; if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { - gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + gcm_ctx->impl = GCM_IMPL_USED; } else { /* - * Handle the "cycle" implementation by creating avx and - * non-avx contexts alternately. + * Handle the "cycle" implementation by creating different + * contexts, one per implementation. */ - gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + gcm_ctx->impl = gcm_toggle_impl(); - /* The avx impl. doesn't handle byte swapped key schedules. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { - gcm_ctx->gcm_use_avx = B_FALSE; + /* The AVX impl. doesn't handle byte swapped key schedules. */ + if (needs_bswap == B_TRUE) { + gcm_ctx->impl = GCM_IMPL_GENERIC; } /* - * If this is a GCM context, use the MOVBE and the BSWAP + * If this is an AVX context, use the MOVBE and the BSWAP * variants alternately. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && + if (gcm_ctx->impl == GCM_IMPL_AVX && zfs_movbe_available() == B_TRUE) { (void) atomic_toggle_boolean_nv( (volatile boolean_t *)&gcm_avx_can_use_movbe); @@ -658,12 +660,13 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, * still they could be created by the aes generic implementation. * Make sure not to use them since we'll corrupt data if we do. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { - gcm_ctx->gcm_use_avx = B_FALSE; + if (gcm_ctx->impl != GCM_IMPL_GENERIC && needs_bswap == B_TRUE) { + gcm_ctx->impl = GCM_IMPL_GENERIC; cmn_err_once(CE_WARN, "ICP: Can't use the aes generic or cycle implementations " - "in combination with the gcm avx implementation!"); + "in combination with the gcm avx or avx2-vaes " + "implementation!"); cmn_err_once(CE_WARN, "ICP: Falling back to a compatible implementation, " "aes-gcm performance will likely be degraded."); @@ -672,36 +675,20 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, "restore performance."); } - /* Allocate Htab memory as needed. */ - if (gcm_ctx->gcm_use_avx == B_TRUE) { - size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); - - if (htab_len == 0) { - return (CRYPTO_MECHANISM_PARAM_INVALID); - } - gcm_ctx->gcm_htab_len = htab_len; - gcm_ctx->gcm_Htable = - kmem_alloc(htab_len, KM_SLEEP); - - if (gcm_ctx->gcm_Htable == NULL) { - return (CRYPTO_HOST_MEMORY); - } + /* + * AVX implementations use Htable with sizes depending on + * implementation. + */ + if (gcm_ctx->impl != GCM_IMPL_GENERIC) { + rv = gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len, + block_size); } - /* Avx and non avx context initialization differs from here on. */ - if (gcm_ctx->gcm_use_avx == B_FALSE) { + else #endif /* ifdef CAN_USE_GCM_ASM */ - if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size, - encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } -#ifdef CAN_USE_GCM_ASM - } else { - if (gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len, - block_size) != CRYPTO_SUCCESS) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } + if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size, + encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; } -#endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } @@ -767,6 +754,9 @@ gcm_impl_get_ops(void) break; #ifdef CAN_USE_GCM_ASM case IMPL_AVX: +#if CAN_USE_GCM_ASM >= 2 + case IMPL_AVX2: +#endif /* * Make sure that we return a valid implementation while * switching to the avx implementation since there still @@ -828,6 +818,13 @@ gcm_impl_init(void) * Use the avx implementation if it's available and the implementation * hasn't changed from its default value of fastest on module load. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_avx2_will_work()) { + if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { + gcm_use_impl(GCM_IMPL_AVX2); + } + } else +#endif if (gcm_avx_will_work()) { #ifdef HAVE_MOVBE if (zfs_movbe_available() == B_TRUE) { @@ -835,7 +832,7 @@ gcm_impl_init(void) } #endif if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { - gcm_set_avx(B_TRUE); + gcm_use_impl(GCM_IMPL_AVX); } } #endif @@ -852,6 +849,7 @@ static const struct { { "fastest", IMPL_FASTEST }, #ifdef CAN_USE_GCM_ASM { "avx", IMPL_AVX }, + { "avx2-vaes", IMPL_AVX2 }, #endif }; @@ -887,7 +885,13 @@ gcm_impl_set(const char *val) /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM +#if CAN_USE_GCM_ASM >= 2 /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX2 && + !gcm_avx2_will_work()) { + continue; + } +#endif if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } @@ -915,11 +919,17 @@ gcm_impl_set(const char *val) * Use the avx implementation if available and the requested one is * avx or fastest. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_avx2_will_work() == B_TRUE && + (impl == IMPL_AVX2 || impl == IMPL_FASTEST)) { + gcm_use_impl(GCM_IMPL_AVX2); + } else +#endif if (gcm_avx_will_work() == B_TRUE && (impl == IMPL_AVX || impl == IMPL_FASTEST)) { - gcm_set_avx(B_TRUE); + gcm_use_impl(GCM_IMPL_AVX); } else { - gcm_set_avx(B_FALSE); + gcm_use_impl(GCM_IMPL_GENERIC); } #endif @@ -952,6 +962,12 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work. */ +#if CAN_USE_GCM_ASM >= 2 + if (gcm_impl_opts[i].sel == IMPL_AVX2 && + !gcm_avx2_will_work()) { + continue; + } +#endif if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } @@ -993,9 +1009,6 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); /* Clear the FPU registers since they hold sensitive internal state. */ #define clear_fpu_regs() clear_fpu_regs_avx() -#define GHASH_AVX(ctx, in, len) \ - gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ - in, len) #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) @@ -1010,20 +1023,77 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; +/* + * GCM definitions: uint128_t is copied from include/crypto/modes.h + * Avoiding u128 because it is already defined in kernel sources. + */ +typedef struct { + uint64_t hi, lo; +} uint128_t; + extern void ASMABI clear_fpu_regs_avx(void); extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst); extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI gcm_init_vpclmulqdq_avx2(uint128_t Htable[16], + const uint64_t H[2]); +#endif extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI gcm_ghash_vpclmulqdq_avx2(uint64_t ghash[2], + const uint64_t *Htable, const uint8_t *in, size_t len); +#endif +static inline void GHASH_AVX(gcm_ctx_t *ctx, const uint8_t *in, size_t len) +{ + switch (ctx->impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + gcm_ghash_vpclmulqdq_avx2(ctx->gcm_ghash, + (const uint64_t *)ctx->gcm_Htable, in, len); + break; +#endif + + case GCM_IMPL_AVX: + gcm_ghash_avx(ctx->gcm_ghash, + (const uint64_t *)ctx->gcm_Htable, in, len); + break; + + default: + VERIFY(B_FALSE); + } +} +typedef size_t ASMABI aesni_gcm_encrypt_impl(const uint8_t *, uint8_t *, + size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *); extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI aes_gcm_enc_update_vaes_avx2(const uint8_t *in, + uint8_t *out, size_t len, const void *key, const uint8_t ivec[16], + const uint128_t Htable[16], uint8_t Xi[16]); +#endif +typedef size_t ASMABI aesni_gcm_decrypt_impl(const uint8_t *, uint8_t *, + size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *); extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); +#if CAN_USE_GCM_ASM >= 2 +extern void ASMABI aes_gcm_dec_update_vaes_avx2(const uint8_t *in, + uint8_t *out, size_t len, const void *key, const uint8_t ivec[16], + const uint128_t Htable[16], uint8_t Xi[16]); +#endif + +static inline boolean_t +gcm_avx2_will_work(void) +{ + return (kfpu_allowed() && + zfs_avx2_available() && zfs_vaes_available() && + zfs_vpclmulqdq_available()); +} static inline boolean_t gcm_avx_will_work(void) @@ -1035,33 +1105,67 @@ gcm_avx_will_work(void) } static inline void -gcm_set_avx(boolean_t val) +gcm_use_impl(gcm_impl impl) { - if (gcm_avx_will_work() == B_TRUE) { - atomic_swap_32(&gcm_use_avx, val); + switch (impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + if (gcm_avx2_will_work() == B_TRUE) { + atomic_swap_32(&gcm_impl_used, impl); + return; + } + + zfs_fallthrough; +#endif + + case GCM_IMPL_AVX: + if (gcm_avx_will_work() == B_TRUE) { + atomic_swap_32(&gcm_impl_used, impl); + return; + } + + zfs_fallthrough; + + default: + atomic_swap_32(&gcm_impl_used, GCM_IMPL_GENERIC); } } static inline boolean_t -gcm_toggle_avx(void) +gcm_impl_will_work(gcm_impl impl) { - if (gcm_avx_will_work() == B_TRUE) { - return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); - } else { - return (B_FALSE); + switch (impl) { +#if CAN_USE_GCM_ASM >= 2 + case GCM_IMPL_AVX2: + return (gcm_avx2_will_work()); +#endif + + case GCM_IMPL_AVX: + return (gcm_avx_will_work()); + + default: + return (B_TRUE); } } -static inline size_t -gcm_simd_get_htab_size(boolean_t simd_mode) +static inline gcm_impl +gcm_toggle_impl(void) { - switch (simd_mode) { - case B_TRUE: - return (2 * 6 * 2 * sizeof (uint64_t)); + gcm_impl current_impl, new_impl; + do { /* handle races */ + current_impl = atomic_load_32(&gcm_impl_used); + new_impl = current_impl; + while (B_TRUE) { /* handle incompatble implementations */ + new_impl = (new_impl + 1) % GCM_IMPL_MAX; + if (gcm_impl_will_work(new_impl)) { + break; + } + } - default: - return (0); - } + } while (atomic_cas_32(&gcm_impl_used, current_impl, new_impl) != + current_impl); + + return (new_impl); } @@ -1077,6 +1181,50 @@ gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; } +static size_t aesni_gcm_encrypt_avx(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + (void) Htable; + return (aesni_gcm_encrypt(in, out, len, key, iv, Xip)); +} + +#if CAN_USE_GCM_ASM >= 2 +// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four +// bits of a |size_t|. +// This is from boringssl/crypto/fipsmodule/aes/gcm.cc.inc +static const size_t kSizeTWithoutLower4Bits = (size_t)-16; + +/* The following CRYPTO methods are from boringssl/crypto/internal.h */ +static inline uint32_t CRYPTO_bswap4(uint32_t x) { + return (__builtin_bswap32(x)); +} + +static inline uint32_t CRYPTO_load_u32_be(const void *in) { + uint32_t v; + memcpy(&v, in, sizeof (v)); + return (CRYPTO_bswap4(v)); +} + +static inline void CRYPTO_store_u32_be(void *out, uint32_t v) { + v = CRYPTO_bswap4(v); + memcpy(out, &v, sizeof (v)); +} + +static size_t aesni_gcm_encrypt_avx2(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + uint8_t *ivec = (uint8_t *)iv; + len &= kSizeTWithoutLower4Bits; + aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec, + (const uint128_t *)Htable, (uint8_t *)Xip); + CRYPTO_store_u32_be(&ivec[12], + CRYPTO_load_u32_be(&ivec[12]) + len / 16); + return (len); +} +#endif /* if CAN_USE_GCM_ASM >= 2 */ + /* * Encrypt multiple blocks of data in GCM mode. * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines @@ -1091,8 +1239,15 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, size_t done = 0; uint8_t *datap = (uint8_t *)data; size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + aesni_gcm_encrypt_impl *encrypt_blocks = +#if CAN_USE_GCM_ASM >= 2 + ctx->impl == GCM_IMPL_AVX2 ? + aesni_gcm_encrypt_avx2 : +#endif + aesni_gcm_encrypt_avx; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint64_t *ghash = ctx->gcm_ghash; + uint64_t *htable = ctx->gcm_Htable; uint64_t *cb = ctx->gcm_cb; uint8_t *ct_buf = NULL; uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; @@ -1156,8 +1311,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Do the bulk encryption in chunk_size blocks. */ for (; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); - done = aesni_gcm_encrypt( - datap, ct_buf, chunk_size, key, cb, ghash); + done = encrypt_blocks( + datap, ct_buf, chunk_size, key, cb, htable, ghash); clear_fpu_regs(); kfpu_end(); @@ -1180,7 +1335,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Bulk encrypt the remaining data. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { - done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); + done = encrypt_blocks(datap, ct_buf, bleft, key, cb, htable, + ghash); if (done == 0) { rv = CRYPTO_FAILED; goto out; @@ -1293,6 +1449,29 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) return (CRYPTO_SUCCESS); } +static size_t aesni_gcm_decrypt_avx(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + (void) Htable; + return (aesni_gcm_decrypt(in, out, len, key, iv, Xip)); +} + +#if CAN_USE_GCM_ASM >= 2 +static size_t aesni_gcm_decrypt_avx2(const uint8_t *in, uint8_t *out, + size_t len, const void *key, uint64_t *iv, const uint64_t *Htable, + uint64_t *Xip) +{ + uint8_t *ivec = (uint8_t *)iv; + len &= kSizeTWithoutLower4Bits; + aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec, + (const uint128_t *)Htable, (uint8_t *)Xip); + CRYPTO_store_u32_be(&ivec[12], + CRYPTO_load_u32_be(&ivec[12]) + len / 16); + return (len); +} +#endif /* if CAN_USE_GCM_ASM >= 2 */ + /* * Finalize decryption: We just have accumulated crypto text, so now we * decrypt it here inplace. @@ -1306,10 +1485,17 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) B_FALSE); size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + aesni_gcm_decrypt_impl *decrypt_blocks = +#if CAN_USE_GCM_ASM >= 2 + ctx->impl == GCM_IMPL_AVX2 ? + aesni_gcm_decrypt_avx2 : +#endif + aesni_gcm_decrypt_avx; size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; uint8_t *datap = ctx->gcm_pt_buf; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint32_t *cb = (uint32_t *)ctx->gcm_cb; + uint64_t *htable = ctx->gcm_Htable; uint64_t *ghash = ctx->gcm_ghash; uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; int rv = CRYPTO_SUCCESS; @@ -1322,8 +1508,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) */ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); - done = aesni_gcm_decrypt(datap, datap, chunk_size, - (const void *)key, ctx->gcm_cb, ghash); + done = decrypt_blocks(datap, datap, chunk_size, + (const void *)key, ctx->gcm_cb, htable, ghash); clear_fpu_regs(); kfpu_end(); if (done != chunk_size) { @@ -1334,8 +1520,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) /* Decrypt remainder, which is less than chunk size, in one go. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { - done = aesni_gcm_decrypt(datap, datap, bleft, - (const void *)key, ctx->gcm_cb, ghash); + done = decrypt_blocks(datap, datap, bleft, + (const void *)key, ctx->gcm_cb, htable, ghash); if (done == 0) { clear_fpu_regs(); kfpu_end(); @@ -1424,13 +1610,42 @@ gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len, ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, B_FALSE); + size_t htab_len = 0; +#if CAN_USE_GCM_ASM >= 2 + if (ctx->impl == GCM_IMPL_AVX2) { + /* + * BoringSSL's API specifies uint128_t[16] for htab; but only + * uint128_t[12] are used. + * See https://github.com/google/boringssl/blob/ + * 813840dd094f9e9c1b00a7368aa25e656554221f1/crypto/fipsmodule/ + * modes/asm/aes-gcm-avx2-x86_64.pl#L198-L200 + */ + htab_len = (2 * 8 * sizeof (uint128_t)); + } else +#endif /* CAN_USE_GCM_ASM >= 2 */ + { + htab_len = (2 * 6 * sizeof (uint128_t)); + } + + ctx->gcm_Htable = kmem_alloc(htab_len, KM_SLEEP); + if (ctx->gcm_Htable == NULL) { + return (CRYPTO_HOST_MEMORY); + } + /* Init H (encrypt zero block) and create the initial counter block. */ memset(H, 0, sizeof (ctx->gcm_H)); kfpu_begin(); aes_encrypt_intel(keysched, aes_rounds, (const uint32_t *)H, (uint32_t *)H); - gcm_init_htab_avx(ctx->gcm_Htable, H); +#if CAN_USE_GCM_ASM >= 2 + if (ctx->impl == GCM_IMPL_AVX2) { + gcm_init_vpclmulqdq_avx2((uint128_t *)ctx->gcm_Htable, H); + } else +#endif /* if CAN_USE_GCM_ASM >= 2 */ + { + gcm_init_htab_avx(ctx->gcm_Htable, H); + } if (iv_len == 12) { memcpy(cb, iv, 12); diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c index 343591cd9691..ef3c1806e4b6 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c @@ -171,7 +171,7 @@ gcm_clear_ctx(gcm_ctx_t *ctx) explicit_memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder)); explicit_memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); #if defined(CAN_USE_GCM_ASM) - if (ctx->gcm_use_avx == B_TRUE) { + if (ctx->impl != GCM_IMPL_GENERIC) { ASSERT3P(ctx->gcm_Htable, !=, NULL); explicit_memset(ctx->gcm_Htable, 0, ctx->gcm_htab_len); kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl new file mode 100644 index 000000000000..04c03a37e0cb --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl @@ -0,0 +1,253 @@ +BoringSSL is a fork of OpenSSL. As such, large parts of it fall under OpenSSL +licensing. Files that are completely new have a Google copyright and an ISC +license. This license is reproduced at the bottom of this file. + +Contributors to BoringSSL are required to follow the CLA rules for Chromium: +https://cla.developers.google.com/clas + +Files in third_party/ have their own licenses, as described therein. The MIT +license, for third_party/fiat, which, unlike other third_party directories, is +compiled into non-test libraries, is included below. + +The OpenSSL toolkit stays under a dual license, i.e. both the conditions of the +OpenSSL License and the original SSLeay license apply to the toolkit. See below +for the actual license texts. Actually both licenses are BSD-style Open Source +licenses. In case of any license issues related to OpenSSL please contact +openssl-core@openssl.org. + +The following are Google-internal bug numbers where explicit permission from +some authors is recorded for use of their work. (This is purely for our own +record keeping.) + 27287199 + 27287880 + 27287883 + 263291445 + + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +ISC license used for completely new code in BoringSSL: + +/* Copyright 2015 The BoringSSL Authors + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + + +The code in third_party/fiat carries the MIT license: + +Copyright (c) 2015-2016 the fiat-crypto authors (see +https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Licenses for support code +------------------------- + +Parts of the TLS test suite are under the Go license. This code is not included +in BoringSSL (i.e. libcrypto and libssl) when compiled, however, so +distributing code linked against BoringSSL does not trigger this license: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +BoringSSL uses the Chromium test infrastructure to run a continuous build, +trybots etc. The scripts which manage this, and the script for generating build +metadata, are under the Chromium license. Distributing code linked against +BoringSSL does not trigger this license. + +Copyright 2015 The Chromium Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip new file mode 100644 index 000000000000..f63a67a4d2ae --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.boringssl.descrip @@ -0,0 +1 @@ +PORTIONS OF AES GCM and GHASH FUNCTIONALITY diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S new file mode 100644 index 000000000000..3d1b045127e2 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S @@ -0,0 +1,1323 @@ +// SPDX-License-Identifier: Apache-2.0 +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#if defined(__x86_64__) && defined(HAVE_AVX) && \ + defined(HAVE_VAES) && defined(HAVE_VPCLMULQDQ) + +#define _ASM +#include <sys/asm_linkage.h> + +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) + +.section .rodata +.balign 16 + + +.Lbswap_mask: +.quad 0x08090a0b0c0d0e0f, 0x0001020304050607 + + + + + + + + +.Lgfpoly: +.quad 1, 0xc200000000000000 + + +.Lgfpoly_and_internal_carrybit: +.quad 1, 0xc200000000000001 + +.balign 32 + +.Lctr_pattern: +.quad 0, 0 +.quad 1, 0 +.Linc_2blocks: +.quad 2, 0 +.quad 2, 0 + +ENTRY_ALIGN(gcm_init_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + + + vmovdqu (%rsi),%xmm3 + // KCF/ICP stores H in network byte order with the hi qword first + // so we need to swap all bytes, not the 2 qwords. + vmovdqu .Lbswap_mask(%rip),%xmm4 + vpshufb %xmm4,%xmm3,%xmm3 + + + + + + vpshufd $0xd3,%xmm3,%xmm0 + vpsrad $31,%xmm0,%xmm0 + vpaddq %xmm3,%xmm3,%xmm3 + vpand .Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + + vbroadcasti128 .Lgfpoly(%rip),%ymm6 + + + vpclmulqdq $0x00,%xmm3,%xmm3,%xmm0 + vpclmulqdq $0x11,%xmm3,%xmm3,%xmm5 + vpclmulqdq $0x01,%xmm0,%xmm6,%xmm1 + vpshufd $0x4e,%xmm0,%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm6,%xmm0 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm5,%xmm5 + vpxor %xmm0,%xmm5,%xmm5 + + + + vinserti128 $1,%xmm3,%ymm5,%ymm3 + vinserti128 $1,%xmm5,%ymm5,%ymm5 + + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + + + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,64(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128+32(%rdi) + + + vpclmulqdq $0x00,%ymm5,%ymm4,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm4,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm4,%ymm3 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm3,%ymm3 + vpxor %ymm0,%ymm3,%ymm3 + + vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0 + vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1 + vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2 + vpshufd $0x4e,%ymm0,%ymm0 + vpxor %ymm0,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4 + vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,0(%rdi) + + + + vpunpcklqdq %ymm3,%ymm4,%ymm0 + vpunpckhqdq %ymm3,%ymm4,%ymm1 + vpxor %ymm1,%ymm0,%ymm0 + vmovdqu %ymm0,128(%rdi) + + vzeroupper + RET + +.cfi_endproc +SET_SIZE(gcm_init_vpclmulqdq_avx2) +ENTRY_ALIGN(gcm_gmult_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + vmovdqu (%rdi),%xmm0 + vmovdqu .Lbswap_mask(%rip),%xmm1 + vmovdqu 128-16(%rsi),%xmm2 + vmovdqu .Lgfpoly(%rip),%xmm3 + vpshufb %xmm1,%xmm0,%xmm0 + + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6 + vpshufd $0x4e,%xmm4,%xmm4 + vpxor %xmm4,%xmm5,%xmm5 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4 + vpshufd $0x4e,%xmm5,%xmm5 + vpxor %xmm5,%xmm0,%xmm0 + vpxor %xmm4,%xmm0,%xmm0 + + + vpshufb %xmm1,%xmm0,%xmm0 + vmovdqu %xmm0,(%rdi) + + + RET + +.cfi_endproc +SET_SIZE(gcm_gmult_vpclmulqdq_avx2) +ENTRY_ALIGN(gcm_ghash_vpclmulqdq_avx2, 32) +.cfi_startproc + +ENDBR + + + + + + + vmovdqu .Lbswap_mask(%rip),%xmm6 + vmovdqu .Lgfpoly(%rip),%xmm7 + + + vmovdqu (%rdi),%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + + + cmpq $32,%rcx + jb .Lghash_lastblock + + + + vinserti128 $1,%xmm6,%ymm6,%ymm6 + vinserti128 $1,%xmm7,%ymm7,%ymm7 + + cmpq $127,%rcx + jbe .Lghash_loop_1x + + + vmovdqu 128(%rsi),%ymm8 + vmovdqu 128+32(%rsi),%ymm9 +.Lghash_loop_4x: + + vmovdqu 0(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 0(%rsi),%ymm2 + vpxor %ymm5,%ymm1,%ymm1 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm8,%ymm0,%ymm4 + + vmovdqu 32(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 32(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm8,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vmovdqu 64(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x00,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + + vmovdqu 96(%rdx),%ymm1 + vpshufb %ymm6,%ymm1,%ymm1 + vmovdqu 96(%rsi),%ymm2 + vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vpunpckhqdq %ymm1,%ymm1,%ymm0 + vpxor %ymm1,%ymm0,%ymm0 + vpclmulqdq $0x10,%ymm9,%ymm0,%ymm0 + vpxor %ymm0,%ymm4,%ymm4 + + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm5,%ymm4,%ymm4 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm3,%ymm2,%ymm0 + vpshufd $0x4e,%ymm3,%ymm3 + vpxor %ymm3,%ymm4,%ymm4 + vpxor %ymm0,%ymm4,%ymm4 + + vpclmulqdq $0x01,%ymm4,%ymm2,%ymm0 + vpshufd $0x4e,%ymm4,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm0,%ymm5,%ymm5 + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + + subq $-128,%rdx + addq $-128,%rcx + cmpq $127,%rcx + ja .Lghash_loop_4x + + + cmpq $32,%rcx + jb .Lghash_loop_1x_done +.Lghash_loop_1x: + vmovdqu (%rdx),%ymm0 + vpshufb %ymm6,%ymm0,%ymm0 + vpxor %ymm0,%ymm5,%ymm5 + vmovdqu 128-32(%rsi),%ymm0 + vpclmulqdq $0x00,%ymm0,%ymm5,%ymm1 + vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2 + vpclmulqdq $0x10,%ymm0,%ymm5,%ymm3 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x01,%ymm1,%ymm7,%ymm3 + vpshufd $0x4e,%ymm1,%ymm1 + vpxor %ymm1,%ymm2,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x11,%ymm0,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm7,%ymm1 + vpshufd $0x4e,%ymm2,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm1,%ymm5,%ymm5 + + vextracti128 $1,%ymm5,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + addq $32,%rdx + subq $32,%rcx + cmpq $32,%rcx + jae .Lghash_loop_1x +.Lghash_loop_1x_done: + + +.Lghash_lastblock: + testq %rcx,%rcx + jz .Lghash_done + vmovdqu (%rdx),%xmm0 + vpshufb %xmm6,%xmm0,%xmm0 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 128-16(%rsi),%xmm0 + vpclmulqdq $0x00,%xmm0,%xmm5,%xmm1 + vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2 + vpclmulqdq $0x10,%xmm0,%xmm5,%xmm3 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x01,%xmm1,%xmm7,%xmm3 + vpshufd $0x4e,%xmm1,%xmm1 + vpxor %xmm1,%xmm2,%xmm2 + vpxor %xmm3,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm5,%xmm5 + vpclmulqdq $0x01,%xmm2,%xmm7,%xmm1 + vpshufd $0x4e,%xmm2,%xmm2 + vpxor %xmm2,%xmm5,%xmm5 + vpxor %xmm1,%xmm5,%xmm5 + + +.Lghash_done: + + vpshufb %xmm6,%xmm5,%xmm5 + vmovdqu %xmm5,(%rdi) + + vzeroupper + RET + +.cfi_endproc +SET_SIZE(gcm_ghash_vpclmulqdq_avx2) +ENTRY_ALIGN(aes_gcm_enc_update_vaes_avx2, 32) +.cfi_startproc + +ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 +#ifdef BORINGSSL_DISPATCH_TEST +.extern BORINGSSL_function_hit +.hidden BORINGSSL_function_hit + movb $1,BORINGSSL_function_hit+6(%rip) +#endif + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 504(%rcx),%r10d // ICP has a larger offset for rounds. + leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds. + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func1 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + leaq 16(%rcx),%rax +.Lvaesenc_loop_first_4_vecs__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_first_4_vecs__func1 + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + addq $-128,%rdx + cmpq $127,%rdx + jbe .Lghash_last_ciphertext_4x__func1 +.balign 16 +.Lcrypt_loop_4x__func1: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func1 + je .Laes192__func1 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func1: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func1: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + subq $-128,%rsi + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func1 +.Lghash_last_ciphertext_4x__func1: + + vmovdqu 0(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vmovdqu 32(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vmovdqu 64(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rsi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + subq $-128,%rsi +.Lcrypt_loop_4x_done__func1: + + testq %rdx,%rdx + jz .Ldone__func1 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func1 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %ymm0,%ymm13,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func1 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func1: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func1: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func1 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func1 + je .Lxor_two_blocks__func1 + +.Lxor_three_blocks__func1: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm12,%ymm12 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_two_blocks__func1: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func1 + +.Lxor_one_block__func1: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func1: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func1: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func1: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + RET + +.cfi_endproc +SET_SIZE(aes_gcm_enc_update_vaes_avx2) +ENTRY_ALIGN(aes_gcm_dec_update_vaes_avx2, 32) +.cfi_startproc + +ENDBR + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + + movq 16(%rsp),%r12 + vbroadcasti128 .Lbswap_mask(%rip),%ymm0 + + + + vmovdqu (%r12),%xmm1 + vpshufb %xmm0,%xmm1,%xmm1 + vbroadcasti128 (%r8),%ymm11 + vpshufb %ymm0,%ymm11,%ymm11 + + + + movl 504(%rcx),%r10d // ICP has a larger offset for rounds. + leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds. + + + + + leaq 96(%rcx,%r10,4),%r11 + vbroadcasti128 (%rcx),%ymm9 + vbroadcasti128 (%r11),%ymm10 + + + vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11 + + + + cmpq $127,%rdx + jbe .Lcrypt_loop_4x_done__func2 + + vmovdqu 128(%r9),%ymm7 + vmovdqu 128+32(%r9),%ymm8 +.balign 16 +.Lcrypt_loop_4x__func2: + + + + + vmovdqu .Linc_2blocks(%rip),%ymm2 + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm14 + vpaddd %ymm2,%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm15 + vpaddd %ymm2,%ymm11,%ymm11 + + + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + vpxor %ymm9,%ymm14,%ymm14 + vpxor %ymm9,%ymm15,%ymm15 + + cmpl $24,%r10d + jl .Laes128__func2 + je .Laes192__func2 + + vbroadcasti128 -208(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -192(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes192__func2: + vbroadcasti128 -176(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vbroadcasti128 -160(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + +.Laes128__func2: + prefetcht0 512(%rdi) + prefetcht0 512+64(%rdi) + + vmovdqu 0(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 0(%r9),%ymm4 + vpxor %ymm1,%ymm3,%ymm3 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6 + + vbroadcasti128 -144(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vbroadcasti128 -128(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 32(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 32(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -112(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vmovdqu 64(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + vmovdqu 64(%r9),%ymm4 + + vbroadcasti128 -96(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -80(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + + vmovdqu 96(%rdi),%ymm3 + vpshufb %ymm0,%ymm3,%ymm3 + + vbroadcasti128 -64(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vmovdqu 96(%r9),%ymm4 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm5,%ymm5 + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vpunpckhqdq %ymm3,%ymm3,%ymm2 + vpxor %ymm3,%ymm2,%ymm2 + vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -48(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm1,%ymm6,%ymm6 + + + vbroadcasti128 .Lgfpoly(%rip),%ymm4 + vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm2,%ymm6,%ymm6 + + vbroadcasti128 -32(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + + vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm1,%ymm1 + vpxor %ymm2,%ymm1,%ymm1 + + vbroadcasti128 -16(%r11),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + vaesenc %ymm2,%ymm14,%ymm14 + vaesenc %ymm2,%ymm15,%ymm15 + + vextracti128 $1,%ymm1,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + + + + vpxor 0(%rdi),%ymm10,%ymm2 + vpxor 32(%rdi),%ymm10,%ymm3 + vpxor 64(%rdi),%ymm10,%ymm5 + vpxor 96(%rdi),%ymm10,%ymm6 + vaesenclast %ymm2,%ymm12,%ymm12 + vaesenclast %ymm3,%ymm13,%ymm13 + vaesenclast %ymm5,%ymm14,%ymm14 + vaesenclast %ymm6,%ymm15,%ymm15 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + vmovdqu %ymm14,64(%rsi) + vmovdqu %ymm15,96(%rsi) + + subq $-128,%rdi + subq $-128,%rsi + addq $-128,%rdx + cmpq $127,%rdx + ja .Lcrypt_loop_4x__func2 +.Lcrypt_loop_4x_done__func2: + + testq %rdx,%rdx + jz .Ldone__func2 + + + + + + leaq 128(%r9),%r8 + subq %rdx,%r8 + + + vpxor %xmm5,%xmm5,%xmm5 + vpxor %xmm6,%xmm6,%xmm6 + vpxor %xmm7,%xmm7,%xmm7 + + cmpq $64,%rdx + jb .Llessthan64bytes__func2 + + + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_1__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_1__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%ymm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %ymm3,%ymm13,%ymm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %ymm13,32(%rsi) + + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %ymm0,%ymm3,%ymm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%ymm3 + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7 + vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + + addq $64,%r8 + addq $64,%rdi + addq $64,%rsi + subq $64,%rdx + jz .Lreduce__func2 + + vpxor %xmm1,%xmm1,%xmm1 + + +.Llessthan64bytes__func2: + vpshufb %ymm0,%ymm11,%ymm12 + vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11 + vpshufb %ymm0,%ymm11,%ymm13 + vpxor %ymm9,%ymm12,%ymm12 + vpxor %ymm9,%ymm13,%ymm13 + leaq 16(%rcx),%rax +.Lvaesenc_loop_tail_2__func2: + vbroadcasti128 (%rax),%ymm2 + vaesenc %ymm2,%ymm12,%ymm12 + vaesenc %ymm2,%ymm13,%ymm13 + addq $16,%rax + cmpq %rax,%r11 + jne .Lvaesenc_loop_tail_2__func2 + vaesenclast %ymm10,%ymm12,%ymm12 + vaesenclast %ymm10,%ymm13,%ymm13 + + + + + cmpq $32,%rdx + jb .Lxor_one_block__func2 + je .Lxor_two_blocks__func2 + +.Lxor_three_blocks__func2: + vmovdqu 0(%rdi),%ymm2 + vmovdqu 32(%rdi),%xmm3 + vpxor %ymm2,%ymm12,%ymm12 + vpxor %xmm3,%xmm13,%xmm13 + vmovdqu %ymm12,0(%rsi) + vmovdqu %xmm13,32(%rsi) + + vpshufb %ymm0,%ymm2,%ymm12 + vpshufb %xmm0,%xmm3,%xmm13 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + vmovdqu 32(%r8),%xmm3 + vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4 + vpxor %ymm4,%ymm7,%ymm7 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_two_blocks__func2: + vmovdqu (%rdi),%ymm2 + vpxor %ymm2,%ymm12,%ymm12 + vmovdqu %ymm12,(%rsi) + vpshufb %ymm0,%ymm2,%ymm12 + vpxor %ymm1,%ymm12,%ymm12 + vmovdqu (%r8),%ymm2 + jmp .Lghash_mul_one_vec_unreduced__func2 + +.Lxor_one_block__func2: + vmovdqu (%rdi),%xmm2 + vpxor %xmm2,%xmm12,%xmm12 + vmovdqu %xmm12,(%rsi) + vpshufb %xmm0,%xmm2,%xmm12 + vpxor %xmm1,%xmm12,%xmm12 + vmovdqu (%r8),%xmm2 + +.Lghash_mul_one_vec_unreduced__func2: + vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm5,%ymm5 + vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm6,%ymm6 + vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4 + vpxor %ymm4,%ymm7,%ymm7 + +.Lreduce__func2: + + vbroadcasti128 .Lgfpoly(%rip),%ymm2 + vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3 + vpshufd $0x4e,%ymm5,%ymm5 + vpxor %ymm5,%ymm6,%ymm6 + vpxor %ymm3,%ymm6,%ymm6 + vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3 + vpshufd $0x4e,%ymm6,%ymm6 + vpxor %ymm6,%ymm7,%ymm7 + vpxor %ymm3,%ymm7,%ymm7 + vextracti128 $1,%ymm7,%xmm1 + vpxor %xmm7,%xmm1,%xmm1 + +.Ldone__func2: + + vpshufb %xmm0,%xmm1,%xmm1 + vmovdqu %xmm1,(%r12) + + vzeroupper + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + RET + +.cfi_endproc +SET_SIZE(aes_gcm_dec_update_vaes_avx2) + +#endif /* !_WIN32 || _KERNEL */ + +/* Mark the stack non-executable. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */ diff --git a/sys/contrib/openzfs/module/icp/include/modes/modes.h b/sys/contrib/openzfs/module/icp/include/modes/modes.h index ca734cf4f045..de11d9eafafb 100644 --- a/sys/contrib/openzfs/module/icp/include/modes/modes.h +++ b/sys/contrib/openzfs/module/icp/include/modes/modes.h @@ -42,7 +42,7 @@ extern "C" { */ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) -#define CAN_USE_GCM_ASM +#define CAN_USE_GCM_ASM (HAVE_VAES && HAVE_VPCLMULQDQ ? 2 : 1) extern boolean_t gcm_avx_can_use_movbe; #endif @@ -129,6 +129,15 @@ typedef struct ccm_ctx { #define ccm_copy_to ccm_common.cc_copy_to #define ccm_flags ccm_common.cc_flags +#ifdef CAN_USE_GCM_ASM +typedef enum gcm_impl { + GCM_IMPL_GENERIC = 0, + GCM_IMPL_AVX, + GCM_IMPL_AVX2, + GCM_IMPL_MAX, +} gcm_impl; +#endif + /* * gcm_tag_len: Length of authentication tag. * @@ -174,7 +183,7 @@ typedef struct gcm_ctx { uint64_t gcm_len_a_len_c[2]; uint8_t *gcm_pt_buf; #ifdef CAN_USE_GCM_ASM - boolean_t gcm_use_avx; + enum gcm_impl impl; #endif } gcm_ctx_t; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index c114db14a916..b218c0da8125 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -112,7 +112,6 @@ static int zfs__fini(void); static void zfs_shutdown(void *, int); static eventhandler_tag zfs_shutdown_event_tag; -static eventhandler_tag zfs_mountroot_event_tag; #define ZFS_MIN_KSTACK_PAGES 4 @@ -311,9 +310,6 @@ zfs_modevent(module_t mod, int type, void *unused __unused) zfs_shutdown_event_tag = EVENTHANDLER_REGISTER( shutdown_post_sync, zfs_shutdown, NULL, SHUTDOWN_PRI_FIRST); - zfs_mountroot_event_tag = EVENTHANDLER_REGISTER( - mountroot, spa_boot_init, NULL, - SI_ORDER_ANY); } return (err); case MOD_UNLOAD: @@ -322,9 +318,6 @@ zfs_modevent(module_t mod, int type, void *unused __unused) if (zfs_shutdown_event_tag != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, zfs_shutdown_event_tag); - if (zfs_mountroot_event_tag != NULL) - EVENTHANDLER_DEREGISTER(mountroot, - zfs_mountroot_event_tag); } return (err); case MOD_SHUTDOWN: diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 265dfd55fc4d..0dd2ecd7fd8d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -31,7 +31,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ @@ -196,7 +196,6 @@ DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); static int zvol_geom_open(struct g_provider *pp, int flag, int count); static int zvol_geom_close(struct g_provider *pp, int flag, int count); -static void zvol_geom_destroy(zvol_state_t *zv); static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); static void zvol_geom_bio_start(struct bio *bp); static int zvol_geom_bio_getattr(struct bio *bp); @@ -226,25 +225,14 @@ zvol_geom_open(struct g_provider *pp, int flag, int count) } retry: - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - /* - * Obtain a copy of private under zvol_state_lock to make sure either - * the result of zvol free code setting private to NULL is observed, - * or the zv is protected from being freed because of the positive - * zv_open_count. - */ - zv = pp->private; - if (zv == NULL) { - rw_exit(&zvol_state_lock); - err = SET_ERROR(ENXIO); - goto out_locked; - } + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) + return (SET_ERROR(ENXIO)); mutex_enter(&zv->zv_state_lock); if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { - rw_exit(&zvol_state_lock); err = SET_ERROR(ENXIO); - goto out_zv_locked; + goto out_locked; } ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); @@ -257,8 +245,24 @@ retry: drop_suspend = B_TRUE; if (!rw_tryenter(&zv->zv_suspend_lock, ZVOL_RW_READER)) { mutex_exit(&zv->zv_state_lock); + + /* + * Removal may happen while the locks are down, so + * we can't trust zv any longer; we have to start over. + */ + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) + return (SET_ERROR(ENXIO)); + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + if (zv->zv_zso->zso_dying || + zv->zv_flags & ZVOL_REMOVING) { + err = SET_ERROR(ENXIO); + goto out_locked; + } + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -266,7 +270,6 @@ retry: } } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -294,7 +297,7 @@ retry: if (drop_namespace) mutex_exit(&spa_namespace_lock); if (err) - goto out_zv_locked; + goto out_locked; pp->mediasize = zv->zv_volsize; pp->stripeoffset = 0; pp->stripesize = zv->zv_volblocksize; @@ -329,9 +332,8 @@ out_opened: zvol_last_close(zv); wakeup(zv); } -out_zv_locked: - mutex_exit(&zv->zv_state_lock); out_locked: + mutex_exit(&zv->zv_state_lock); if (drop_suspend) rw_exit(&zv->zv_suspend_lock); return (err); @@ -345,12 +347,9 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) boolean_t drop_suspend = B_TRUE; int new_open_count; - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - zv = pp->private; - if (zv == NULL) { - rw_exit(&zvol_state_lock); + zv = atomic_load_ptr(&pp->private); + if (zv == NULL) return (SET_ERROR(ENXIO)); - } mutex_enter(&zv->zv_state_lock); if (zv->zv_flags & ZVOL_EXCL) { @@ -377,6 +376,15 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_geom_open(), we don't check if + * removal started here, because we might be one of the + * openers that needs to be thrown out! If we're the + * last, we need to call zvol_last_close() below to + * finish cleanup. So, no special treatment for us. + */ + /* Check to see if zv_suspend_lock is needed. */ new_open_count = zv->zv_open_count - count; if (new_open_count != 0) { @@ -387,7 +395,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -408,20 +415,6 @@ zvol_geom_close(struct g_provider *pp, int flag, int count) return (0); } -static void -zvol_geom_destroy(zvol_state_t *zv) -{ - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp = zsg->zsg_provider; - - ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_GEOM); - - g_topology_assert(); - - zsg->zsg_provider = NULL; - g_wither_geom(pp->geom, ENXIO); -} - void zvol_wait_close(zvol_state_t *zv) { @@ -454,7 +447,7 @@ zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", pp->name, acr, acw, ace)); - if (pp->private == NULL) { + if (atomic_load_ptr(&pp->private) == NULL) { if (acr <= 0 && acw <= 0 && ace <= 0) return (0); return (pp->error); @@ -921,25 +914,14 @@ zvol_cdev_open(struct cdev *dev, int flags, int fmt, struct thread *td) boolean_t drop_suspend = B_FALSE; retry: - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - /* - * Obtain a copy of si_drv2 under zvol_state_lock to make sure either - * the result of zvol free code setting si_drv2 to NULL is observed, - * or the zv is protected from being freed because of the positive - * zv_open_count. - */ - zv = dev->si_drv2; - if (zv == NULL) { - rw_exit(&zvol_state_lock); - err = SET_ERROR(ENXIO); - goto out_locked; - } + zv = atomic_load_ptr(&dev->si_drv2); + if (zv == NULL) + return (SET_ERROR(ENXIO)); mutex_enter(&zv->zv_state_lock); - if (zv->zv_zso->zso_dying) { - rw_exit(&zvol_state_lock); + if (zv->zv_zso->zso_dying || zv->zv_flags & ZVOL_REMOVING) { err = SET_ERROR(ENXIO); - goto out_zv_locked; + goto out_locked; } ASSERT3S(zv->zv_volmode, ==, ZFS_VOLMODE_DEV); @@ -954,6 +936,13 @@ retry: mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + /* Removal started while locks were down. */ + err = SET_ERROR(ENXIO); + goto out_locked; + } + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -961,7 +950,6 @@ retry: } } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -989,7 +977,7 @@ retry: if (drop_namespace) mutex_exit(&spa_namespace_lock); if (err) - goto out_zv_locked; + goto out_locked; } ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -1016,9 +1004,8 @@ out_opened: zvol_last_close(zv); wakeup(zv); } -out_zv_locked: - mutex_exit(&zv->zv_state_lock); out_locked: + mutex_exit(&zv->zv_state_lock); if (drop_suspend) rw_exit(&zv->zv_suspend_lock); return (err); @@ -1030,12 +1017,9 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) zvol_state_t *zv; boolean_t drop_suspend = B_TRUE; - rw_enter(&zvol_state_lock, ZVOL_RW_READER); - zv = dev->si_drv2; - if (zv == NULL) { - rw_exit(&zvol_state_lock); + zv = atomic_load_ptr(&dev->si_drv2); + if (zv == NULL) return (SET_ERROR(ENXIO)); - } mutex_enter(&zv->zv_state_lock); if (zv->zv_flags & ZVOL_EXCL) { @@ -1060,6 +1044,15 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_cdev_open(), we don't check if + * removal started here, because we might be one of the + * openers that needs to be thrown out! If we're the + * last, we need to call zvol_last_close() below to + * finish cleanup. So, no special treatment for us. + */ + /* Check to see if zv_suspend_lock is needed. */ if (zv->zv_open_count != 1) { rw_exit(&zv->zv_suspend_lock); @@ -1069,7 +1062,6 @@ zvol_cdev_close(struct cdev *dev, int flags, int fmt, struct thread *td) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -1101,7 +1093,8 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, int error; boolean_t sync; - zv = dev->si_drv2; + zv = atomic_load_ptr(&dev->si_drv2); + ASSERT3P(zv, !=, NULL); error = 0; KASSERT(zv->zv_open_count > 0, @@ -1162,6 +1155,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, *(off_t *)data = 0; break; case DIOCGATTR: { + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); spa_t *spa = dmu_objset_spa(zv->zv_objset); struct diocgattr_arg *arg = (struct diocgattr_arg *)data; uint64_t refd, avail, usedobjs, availobjs; @@ -1186,6 +1180,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, arg->value.off = refd / DEV_BSIZE; } else error = SET_ERROR(ENOIOCTL); + rw_exit(&zv->zv_suspend_lock); break; } case FIOSEEKHOLE: @@ -1196,10 +1191,12 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; + rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); zfs_rangelock_exit(lr); + rw_exit(&zv->zv_suspend_lock); *off = noff; break; } @@ -1400,42 +1397,65 @@ zvol_alloc(const char *name, uint64_t volsize, uint64_t volblocksize, * Remove minor node for the specified volume. */ void -zvol_os_free(zvol_state_t *zv) +zvol_os_remove_minor(zvol_state_t *zv) { - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); + ASSERT0(atomic_read(&zv->zv_suspend_ref)); + ASSERT(zv->zv_flags & ZVOL_REMOVING); - ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); - - rw_destroy(&zv->zv_suspend_lock); - zfs_rangelock_fini(&zv->zv_rangelock); + struct zvol_state_os *zso = zv->zv_zso; + zv->zv_zso = NULL; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp __maybe_unused = zsg->zsg_provider; - - ASSERT0P(pp->private); + struct zvol_state_geom *zsg = &zso->zso_geom; + struct g_provider *pp = zsg->zsg_provider; + atomic_store_ptr(&pp->private, NULL); + mutex_exit(&zv->zv_state_lock); g_topology_lock(); - zvol_geom_destroy(zv); + g_wither_geom(pp->geom, ENXIO); g_topology_unlock(); } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { - struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; + struct zvol_state_dev *zsd = &zso->zso_dev; struct cdev *dev = zsd->zsd_cdev; + if (dev != NULL) + atomic_store_ptr(&dev->si_drv2, NULL); + mutex_exit(&zv->zv_state_lock); + if (dev != NULL) { - ASSERT0P(dev->si_drv2); destroy_dev(dev); knlist_clear(&zsd->zsd_selinfo.si_note, 0); knlist_destroy(&zsd->zsd_selinfo.si_note); } } + kmem_free(zso, sizeof (struct zvol_state_os)); + + mutex_enter(&zv->zv_state_lock); +} + +void +zvol_os_free(zvol_state_t *zv) +{ + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT0(zv->zv_open_count); + ASSERT0P(zv->zv_zso); + + ASSERT0P(zv->zv_objset); + ASSERT0P(zv->zv_zilog); + ASSERT0P(zv->zv_dn); + + ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); + + rw_destroy(&zv->zv_suspend_lock); + zfs_rangelock_fini(&zv->zv_rangelock); + mutex_destroy(&zv->zv_state_lock); cv_destroy(&zv->zv_removing_cv); dataset_kstats_destroy(&zv->zv_kstat); - kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); zvol_minors--; } @@ -1538,28 +1558,6 @@ out_doi: return (error); } -void -zvol_os_clear_private(zvol_state_t *zv) -{ - ASSERT(RW_LOCK_HELD(&zvol_state_lock)); - if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { - struct zvol_state_geom *zsg = &zv->zv_zso->zso_geom; - struct g_provider *pp = zsg->zsg_provider; - - if (pp->private == NULL) /* already cleared */ - return; - - pp->private = NULL; - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { - struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev; - struct cdev *dev = zsd->zsd_cdev; - - if (dev != NULL) - dev->si_drv2 = NULL; - } -} - int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) { diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index a73acdad34ae..bac166fcd89e 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ #include <sys/dataset_kstats.h> @@ -679,28 +679,19 @@ zvol_open(struct block_device *bdev, fmode_t flag) retry: #endif - rw_enter(&zvol_state_lock, RW_READER); - /* - * Obtain a copy of private_data under the zvol_state_lock to make - * sure that either the result of zvol free code path setting - * disk->private_data to NULL is observed, or zvol_os_free() - * is not called on this zv because of the positive zv_open_count. - */ + #ifdef HAVE_BLK_MODE_T - zv = disk->private_data; + zv = atomic_load_ptr(&disk->private_data); #else - zv = bdev->bd_disk->private_data; + zv = atomic_load_ptr(&bdev->bd_disk->private_data); #endif if (zv == NULL) { - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } mutex_enter(&zv->zv_state_lock); - if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); return (-SET_ERROR(ENXIO)); } @@ -712,8 +703,28 @@ retry: if (zv->zv_open_count == 0) { if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { mutex_exit(&zv->zv_state_lock); + + /* + * Removal may happen while the locks are down, so + * we can't trust zv any longer; we have to start over. + */ +#ifdef HAVE_BLK_MODE_T + zv = atomic_load_ptr(&disk->private_data); +#else + zv = atomic_load_ptr(&bdev->bd_disk->private_data); +#endif + if (zv == NULL) + return (-SET_ERROR(ENXIO)); + rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { + mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); + return (-SET_ERROR(ENXIO)); + } + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 0) { rw_exit(&zv->zv_suspend_lock); @@ -724,7 +735,6 @@ retry: drop_suspend = B_TRUE; } } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -821,11 +831,11 @@ zvol_release(struct gendisk *disk, fmode_t unused) #if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG) (void) unused; #endif - zvol_state_t *zv; boolean_t drop_suspend = B_TRUE; - rw_enter(&zvol_state_lock, RW_READER); - zv = disk->private_data; + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); + if (zv == NULL) + return; mutex_enter(&zv->zv_state_lock); ASSERT3U(zv->zv_open_count, >, 0); @@ -839,6 +849,15 @@ zvol_release(struct gendisk *disk, fmode_t unused) mutex_exit(&zv->zv_state_lock); rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); + + /* + * Unlike in zvol_open(), we don't check if removal + * started here, because we might be one of the openers + * that needs to be thrown out! If we're the last, we + * need to call zvol_last_close() below to finish + * cleanup. So, no special treatment for us. + */ + /* check to see if zv_suspend_lock is needed */ if (zv->zv_open_count != 1) { rw_exit(&zv->zv_suspend_lock); @@ -848,7 +867,6 @@ zvol_release(struct gendisk *disk, fmode_t unused) } else { drop_suspend = B_FALSE; } - rw_exit(&zvol_state_lock); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); @@ -868,9 +886,10 @@ static int zvol_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - zvol_state_t *zv = bdev->bd_disk->private_data; int error = 0; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); switch (cmd) { @@ -923,9 +942,8 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) { unsigned int mask = 0; - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0; @@ -933,17 +951,14 @@ zvol_check_events(struct gendisk *disk, unsigned int clearing) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (mask); } static int zvol_revalidate_disk(struct gendisk *disk) { - rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = atomic_load_ptr(&disk->private_data); - zvol_state_t *zv = disk->private_data; if (zv != NULL) { mutex_enter(&zv->zv_state_lock); set_capacity(zv->zv_zso->zvo_disk, @@ -951,8 +966,6 @@ zvol_revalidate_disk(struct gendisk *disk) mutex_exit(&zv->zv_state_lock); } - rw_exit(&zvol_state_lock); - return (0); } @@ -971,16 +984,6 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize) return (0); } -void -zvol_os_clear_private(zvol_state_t *zv) -{ - /* - * Cleared while holding zvol_state_lock as a writer - * which will prevent zvol_open() from opening it. - */ - zv->zv_zso->zvo_disk->private_data = NULL; -} - /* * Provide a simple virtual geometry for legacy compatibility. For devices * smaller than 1 MiB a small head and sector count is used to allow very @@ -990,9 +993,10 @@ zvol_os_clear_private(zvol_state_t *zv) static int zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - zvol_state_t *zv = bdev->bd_disk->private_data; sector_t sectors; + zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data); + ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); sectors = get_capacity(zv->zv_zso->zvo_disk); @@ -1417,53 +1421,70 @@ out_kmem: return (ret); } -/* - * Cleanup then free a zvol_state_t which was created by zvol_alloc(). - * At this time, the structure is not opened by anyone, is taken off - * the zvol_state_list, and has its private data set to NULL. - * The zvol_state_lock is dropped. - * - * This function may take many milliseconds to complete (e.g. we've seen - * it take over 256ms), due to the calls to "blk_cleanup_queue" and - * "del_gendisk". Thus, consumers need to be careful to account for this - * latency when calling this function. - */ void -zvol_os_free(zvol_state_t *zv) +zvol_os_remove_minor(zvol_state_t *zv) { - - ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT0(zv->zv_open_count); - ASSERT0P(zv->zv_zso->zvo_disk->private_data); + ASSERT0(atomic_read(&zv->zv_suspend_ref)); + ASSERT(zv->zv_flags & ZVOL_REMOVING); - rw_destroy(&zv->zv_suspend_lock); - zfs_rangelock_fini(&zv->zv_rangelock); + struct zvol_state_os *zso = zv->zv_zso; + zv->zv_zso = NULL; + + /* Clearing private_data will make new callers return immediately. */ + atomic_store_ptr(&zso->zvo_disk->private_data, NULL); + + /* + * Drop the state lock before calling del_gendisk(). There may be + * callers waiting to acquire it, but del_gendisk() will block until + * they exit, which would deadlock. + */ + mutex_exit(&zv->zv_state_lock); - del_gendisk(zv->zv_zso->zvo_disk); + del_gendisk(zso->zvo_disk); #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \ (defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG)) #if defined(HAVE_BLK_CLEANUP_DISK) - blk_cleanup_disk(zv->zv_zso->zvo_disk); + blk_cleanup_disk(zso->zvo_disk); #else - put_disk(zv->zv_zso->zvo_disk); + put_disk(zso->zvo_disk); #endif #else - blk_cleanup_queue(zv->zv_zso->zvo_queue); - put_disk(zv->zv_zso->zvo_disk); + blk_cleanup_queue(zso->zvo_queue); + put_disk(zso->zvo_disk); #endif - if (zv->zv_zso->use_blk_mq) - blk_mq_free_tag_set(&zv->zv_zso->tag_set); + if (zso->use_blk_mq) + blk_mq_free_tag_set(&zso->tag_set); + + ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); - ida_simple_remove(&zvol_ida, - MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); + kmem_free(zso, sizeof (struct zvol_state_os)); + + mutex_enter(&zv->zv_state_lock); +} + +void +zvol_os_free(zvol_state_t *zv) +{ + + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT0(zv->zv_open_count); + ASSERT0P(zv->zv_zso); + + ASSERT0P(zv->zv_objset); + ASSERT0P(zv->zv_zilog); + ASSERT0P(zv->zv_dn); + + rw_destroy(&zv->zv_suspend_lock); + zfs_rangelock_fini(&zv->zv_rangelock); cv_destroy(&zv->zv_removing_cv); mutex_destroy(&zv->zv_state_lock); dataset_kstats_destroy(&zv->zv_kstat); - kmem_free(zv->zv_zso, sizeof (struct zvol_state_os)); kmem_free(zv, sizeof (zvol_state_t)); } diff --git a/sys/contrib/openzfs/module/zcommon/simd_stat.c b/sys/contrib/openzfs/module/zcommon/simd_stat.c index 11e2080ff9f2..007ae9e4fbbc 100644 --- a/sys/contrib/openzfs/module/zcommon/simd_stat.c +++ b/sys/contrib/openzfs/module/zcommon/simd_stat.c @@ -118,6 +118,10 @@ simd_stat_kstat_data(char *buf, size_t size, void *data) "pclmulqdq", zfs_pclmulqdq_available()); off += SIMD_STAT_PRINT(simd_stat_kstat_payload, "movbe", zfs_movbe_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "vaes", zfs_vaes_available()); + off += SIMD_STAT_PRINT(simd_stat_kstat_payload, + "vpclmulqdq", zfs_vpclmulqdq_available()); off += SIMD_STAT_PRINT(simd_stat_kstat_payload, "osxsave", boot_cpu_has(X86_FEATURE_OSXSAVE)); diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 3d0f88b36336..fccc4c5b5b94 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -2270,14 +2270,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (dn->dn_objset->os_dsl_dataset != NULL) rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG); #endif - /* - * We make this assert for private objects as well, but after we - * check if we're already dirty. They are allowed to re-dirty - * in syncing context. - */ - ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); mutex_enter(&db->db_mtx); /* @@ -2289,12 +2281,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_state == DB_CACHED || db->db_state == DB_FILL || db->db_state == DB_NOFILL); - mutex_enter(&dn->dn_mtx); - dnode_set_dirtyctx(dn, tx, db); - if (tx->tx_txg > dn->dn_dirty_txg) - dn->dn_dirty_txg = tx->tx_txg; - mutex_exit(&dn->dn_mtx); - if (db->db_blkid == DMU_SPILL_BLKID) dn->dn_have_spill = B_TRUE; @@ -2313,13 +2299,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr_next); } - /* - * Only valid if not already dirty. - */ - ASSERT(dn->dn_object == 0 || - dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == - (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); - ASSERT3U(dn->dn_nlevels, >, db->db_level); /* @@ -2557,12 +2536,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) /* * Due to our use of dn_nlevels below, this can only be called - * in open context, unless we are operating on the MOS. - * From syncing context, dn_nlevels may be different from the - * dn_nlevels used when dbuf was dirtied. + * in open context, unless we are operating on the MOS or it's + * a special object. From syncing context, dn_nlevels may be + * different from the dn_nlevels used when dbuf was dirtied. */ ASSERT(db->db_objset == dmu_objset_pool(db->db_objset)->dp_meta_objset || + DMU_OBJECT_IS_SPECIAL(db->db.db_object) || txg != spa_syncing_txg(dmu_objset_spa(db->db_objset))); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT0(db->db_level); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index a77f338bdfd3..8e6b569c2100 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -2037,6 +2037,8 @@ userquota_updates_task(void *arg) dn->dn_id_flags |= DN_ID_CHKED_BONUS; } dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); @@ -2070,6 +2072,10 @@ dnode_rele_task(void *arg) dnode_t *dn; while ((dn = multilist_sublist_head(list)) != NULL) { + mutex_enter(&dn->dn_mtx); + ASSERT3U(dn->dn_dirtycnt, >, 0); + dn->dn_dirtycnt--; + mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); dnode_rele(dn, &os->os_synced_dnodes); } diff --git a/sys/contrib/openzfs/module/zfs/dnode.c b/sys/contrib/openzfs/module/zfs/dnode.c index 963ff41232a3..6c150d31c669 100644 --- a/sys/contrib/openzfs/module/zfs/dnode.c +++ b/sys/contrib/openzfs/module/zfs/dnode.c @@ -173,9 +173,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; + dn->dn_dirtycnt = 0; dn->dn_bonus = NULL; dn->dn_have_spill = B_FALSE; dn->dn_zio = NULL; @@ -229,9 +227,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); - ASSERT0(dn->dn_dirty_txg); - ASSERT0(dn->dn_dirtyctx); - ASSERT0P(dn->dn_dirtyctx_firstset); + ASSERT0(dn->dn_dirtycnt); ASSERT0P(dn->dn_bonus); ASSERT(!dn->dn_have_spill); ASSERT0P(dn->dn_zio); @@ -692,10 +688,8 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; - dn->dn_dirtyctx = 0; - dn->dn_dirtyctx_firstset = NULL; if (dn->dn_bonus != NULL) { mutex_enter(&dn->dn_bonus->db_mtx); dbuf_destroy(dn->dn_bonus); @@ -800,11 +794,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dn->dn_bonuslen = bonuslen; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; - dn->dn_dirtyctx = 0; dn->dn_free_txg = 0; - dn->dn_dirtyctx_firstset = NULL; - dn->dn_dirty_txg = 0; + dn->dn_dirtycnt = 0; dn->dn_allocated_txg = tx->tx_txg; dn->dn_id_flags = 0; @@ -955,9 +947,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; - ndn->dn_dirty_txg = odn->dn_dirty_txg; - ndn->dn_dirtyctx = odn->dn_dirtyctx; - ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; + ndn->dn_dirtycnt = odn->dn_dirtycnt; ASSERT0(zfs_refcount_count(&odn->dn_tx_holds)); zfs_refcount_transfer(&ndn->dn_holds, &odn->dn_holds); ASSERT(avl_is_empty(&ndn->dn_dbufs)); @@ -1020,9 +1010,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; - odn->dn_dirty_txg = 0; - odn->dn_dirtyctx = 0; - odn->dn_dirtyctx_firstset = NULL; + odn->dn_dirtycnt = 0; odn->dn_have_spill = B_FALSE; odn->dn_zio = NULL; odn->dn_oldused = 0; @@ -1273,8 +1261,8 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); boolean_t can_free = (dn->dn_type == DMU_OT_NONE && - zfs_refcount_is_zero(&dn->dn_holds) && - !DNODE_IS_DIRTY(dn)); + dn->dn_dirtycnt == 0 && + zfs_refcount_is_zero(&dn->dn_holds)); mutex_exit(&dn->dn_mtx); if (!can_free) @@ -1757,17 +1745,23 @@ dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp) * reference on the dnode. Returns FALSE if unable to add a * new reference. */ +static boolean_t +dnode_add_ref_locked(dnode_t *dn, const void *tag) +{ + ASSERT(MUTEX_HELD(&dn->dn_mtx)); + if (zfs_refcount_is_zero(&dn->dn_holds)) + return (FALSE); + VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + return (TRUE); +} + boolean_t dnode_add_ref(dnode_t *dn, const void *tag) { mutex_enter(&dn->dn_mtx); - if (zfs_refcount_is_zero(&dn->dn_holds)) { - mutex_exit(&dn->dn_mtx); - return (FALSE); - } - VERIFY(1 < zfs_refcount_add(&dn->dn_holds, tag)); + boolean_t r = dnode_add_ref_locked(dn, tag); mutex_exit(&dn->dn_mtx); - return (TRUE); + return (r); } void @@ -1830,31 +1824,20 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) } /* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. + * Test if the dnode is dirty, or carrying uncommitted records. * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. + * dn_dirtycnt is the number of txgs this dnode is dirty on. It's incremented + * in dnode_setdirty() the first time the dnode is dirtied on a txg, and + * decremented in either dnode_rele_task() or userquota_updates_task() when the + * txg is synced out. */ boolean_t dnode_is_dirty(dnode_t *dn) { mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - + boolean_t dirty = (dn->dn_dirtycnt != 0); mutex_exit(&dn->dn_mtx); - - return (B_FALSE); + return (dirty); } void @@ -1916,7 +1899,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) * dnode will hang around after we finish processing its * children. */ - VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); + mutex_enter(&dn->dn_mtx); + VERIFY(dnode_add_ref_locked(dn, (void *)(uintptr_t)tx->tx_txg)); + dn->dn_dirtycnt++; + ASSERT3U(dn->dn_dirtycnt, <=, 3); + mutex_exit(&dn->dn_mtx); (void) dbuf_dirty(dn->dn_dbuf, tx); @@ -2221,32 +2208,6 @@ dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, mutex_exit(&dn->dn_dbufs_mtx); } -void -dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag) -{ - /* - * Don't set dirtyctx to SYNC if we're just modifying this as we - * initialize the objset. - */ - if (dn->dn_dirtyctx == DN_UNDIRTIED) { - dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - - if (ds != NULL) { - rrw_enter(&ds->ds_bp_rwlock, RW_READER, tag); - } - if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) { - if (dmu_tx_is_syncing(tx)) - dn->dn_dirtyctx = DN_DIRTY_SYNC; - else - dn->dn_dirtyctx = DN_DIRTY_OPEN; - dn->dn_dirtyctx_firstset = tag; - } - if (ds != NULL) { - rrw_exit(&ds->ds_bp_rwlock, tag); - } - } -} - static void dnode_partial_zero(dnode_t *dn, uint64_t off, uint64_t blkoff, uint64_t len, dmu_tx_t *tx) diff --git a/sys/contrib/openzfs/module/zfs/multilist.c b/sys/contrib/openzfs/module/zfs/multilist.c index 7b85d19e19ee..46fb79269310 100644 --- a/sys/contrib/openzfs/module/zfs/multilist.c +++ b/sys/contrib/openzfs/module/zfs/multilist.c @@ -81,7 +81,7 @@ multilist_create_impl(multilist_t *ml, size_t size, size_t offset, ml->ml_num_sublists = num; ml->ml_index_func = index_func; - ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * + ml->ml_sublists = vmem_zalloc(sizeof (multilist_sublist_t) * ml->ml_num_sublists, KM_SLEEP); ASSERT3P(ml->ml_sublists, !=, NULL); @@ -134,7 +134,7 @@ multilist_destroy(multilist_t *ml) } ASSERT3P(ml->ml_sublists, !=, NULL); - kmem_free(ml->ml_sublists, + vmem_free(ml->ml_sublists, sizeof (multilist_sublist_t) * ml->ml_num_sublists); ml->ml_num_sublists = 0; diff --git a/sys/contrib/openzfs/module/zfs/spa_config.c b/sys/contrib/openzfs/module/zfs/spa_config.c index 7d4d06659146..cf28955b0c50 100644 --- a/sys/contrib/openzfs/module/zfs/spa_config.c +++ b/sys/contrib/openzfs/module/zfs/spa_config.c @@ -48,18 +48,17 @@ /* * Pool configuration repository. * - * Pool configuration is stored as a packed nvlist on the filesystem. By - * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot - * (when the ZFS module is loaded). Pools can also have the 'cachefile' - * property set that allows them to be stored in an alternate location until - * the control of external software. + * Pool configuration is stored as a packed nvlist on the filesystem. When + * pools are imported they are added to the /etc/zfs/zpool.cache file and + * removed from it when exported. For each cache file, we have a single nvlist + * which holds all the configuration information. Pools can also have the + * 'cachefile' property set which allows this config to be stored in an + * alternate location under the control of external software. * - * For each cache file, we have a single nvlist which holds all the - * configuration information. When the module loads, we read this information - * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is - * maintained independently in spa.c. Whenever the namespace is modified, or - * the configuration of a pool is changed, we call spa_write_cachefile(), which - * walks through all the active pools and writes the configuration to disk. + * The kernel independantly maintains an AVL tree of imported pools. See the + * "SPA locking" comment in spa.c. Whenever a pool configuration is modified + * we call spa_write_cachefile() which walks through all the active pools and + * writes the updated configuration to to /etc/zfs/zpool.cache file. */ static uint64_t spa_config_generation = 1; @@ -69,94 +68,6 @@ static uint64_t spa_config_generation = 1; * userland pools when doing testing. */ char *spa_config_path = (char *)ZPOOL_CACHE; -#ifdef _KERNEL -static int zfs_autoimport_disable = B_TRUE; -#endif - -/* - * Called when the module is first loaded, this routine loads the configuration - * file into the SPA namespace. It does not actually open or load the pools; it - * only populates the namespace. - */ -void -spa_config_load(void) -{ - void *buf = NULL; - nvlist_t *nvlist, *child; - nvpair_t *nvpair; - char *pathname; - zfs_file_t *fp; - zfs_file_attr_t zfa; - uint64_t fsize; - int err; - -#ifdef _KERNEL - if (zfs_autoimport_disable) - return; -#endif - - /* - * Open the configuration file. - */ - pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - - (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); - - err = zfs_file_open(pathname, O_RDONLY, 0, &fp); - -#ifdef __FreeBSD__ - if (err) - err = zfs_file_open(ZPOOL_CACHE_BOOT, O_RDONLY, 0, &fp); -#endif - kmem_free(pathname, MAXPATHLEN); - - if (err) - return; - - if (zfs_file_getattr(fp, &zfa)) - goto out; - - fsize = zfa.zfa_size; - buf = kmem_alloc(fsize, KM_SLEEP); - - /* - * Read the nvlist from the file. - */ - if (zfs_file_read(fp, buf, fsize, NULL) < 0) - goto out; - - /* - * Unpack the nvlist. - */ - if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) - goto out; - - /* - * Iterate over all elements in the nvlist, creating a new spa_t for - * each one with the specified configuration. - */ - mutex_enter(&spa_namespace_lock); - nvpair = NULL; - while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { - if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) - continue; - - child = fnvpair_value_nvlist(nvpair); - - if (spa_lookup(nvpair_name(nvpair)) != NULL) - continue; - (void) spa_add(nvpair_name(nvpair), child, NULL); - } - mutex_exit(&spa_namespace_lock); - - nvlist_free(nvlist); - -out: - if (buf != NULL) - kmem_free(buf, fsize); - - zfs_file_close(fp); -} static int spa_config_remove(spa_config_dirent_t *dp) @@ -623,7 +534,6 @@ spa_config_update(spa_t *spa, int what) spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); } -EXPORT_SYMBOL(spa_config_load); EXPORT_SYMBOL(spa_all_configs); EXPORT_SYMBOL(spa_config_set); EXPORT_SYMBOL(spa_config_generate); @@ -634,8 +544,3 @@ EXPORT_SYMBOL(spa_config_update); ZFS_MODULE_PARAM(zfs_spa, spa_, config_path, STRING, ZMOD_RD, "SPA config file (/etc/zfs/zpool.cache)"); #endif - -#ifdef _KERNEL -ZFS_MODULE_PARAM(zfs, zfs_, autoimport_disable, INT, ZMOD_RW, - "Disable pool import at module load"); -#endif diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index cce772eae598..dceafbc27556 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -2548,13 +2548,6 @@ spa_name_compare(const void *a1, const void *a2) } void -spa_boot_init(void *unused) -{ - (void) unused; - spa_config_load(); -} - -void spa_init(spa_mode_t mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); @@ -2607,7 +2600,6 @@ spa_init(spa_mode_t mode) chksum_init(); zpool_prop_init(); zpool_feature_init(); - spa_config_load(); vdev_prop_init(); l2arc_start(); scan_init(); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index 31b59c55f17b..0307df55aa21 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -819,34 +819,37 @@ zil_lwb_vdev_compare(const void *x1, const void *x2) * we choose them here and later make the block allocation match. */ static lwb_t * -zil_alloc_lwb(zilog_t *zilog, int sz, blkptr_t *bp, boolean_t slog, - uint64_t txg, lwb_state_t state) +zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, int min_sz, int sz, + boolean_t slog, uint64_t txg) { lwb_t *lwb; lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); + lwb->lwb_flags = 0; lwb->lwb_zilog = zilog; if (bp) { lwb->lwb_blk = *bp; - lwb->lwb_slim = (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2); + if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) + lwb->lwb_flags |= LWB_FLAG_SLIM; sz = BP_GET_LSIZE(bp); + lwb->lwb_min_sz = sz; } else { BP_ZERO(&lwb->lwb_blk); - lwb->lwb_slim = (spa_version(zilog->zl_spa) >= - SPA_VERSION_SLIM_ZIL); + if (spa_version(zilog->zl_spa) >= SPA_VERSION_SLIM_ZIL) + lwb->lwb_flags |= LWB_FLAG_SLIM; + lwb->lwb_min_sz = min_sz; } - lwb->lwb_slog = slog; + if (slog) + lwb->lwb_flags |= LWB_FLAG_SLOG; lwb->lwb_error = 0; - if (lwb->lwb_slim) { - lwb->lwb_nmax = sz; - lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t); - } else { - lwb->lwb_nmax = sz - sizeof (zil_chain_t); - lwb->lwb_nused = lwb->lwb_nfilled = 0; - } + /* + * Buffer allocation and capacity setup will be done in + * zil_lwb_write_open() when the LWB is opened for ITX assignment. + */ + lwb->lwb_nmax = lwb->lwb_nused = lwb->lwb_nfilled = 0; lwb->lwb_sz = sz; - lwb->lwb_state = state; - lwb->lwb_buf = zio_buf_alloc(sz); + lwb->lwb_buf = NULL; + lwb->lwb_state = LWB_STATE_NEW; lwb->lwb_child_zio = NULL; lwb->lwb_write_zio = NULL; lwb->lwb_root_zio = NULL; @@ -857,8 +860,6 @@ zil_alloc_lwb(zilog_t *zilog, int sz, blkptr_t *bp, boolean_t slog, mutex_enter(&zilog->zl_lock); list_insert_tail(&zilog->zl_lwb_list, lwb); - if (state != LWB_STATE_NEW) - zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); return (lwb); @@ -878,7 +879,7 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb) VERIFY(list_is_empty(&lwb->lwb_itxs)); VERIFY(list_is_empty(&lwb->lwb_waiters)); ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); - ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); + ASSERT(!MUTEX_HELD(&lwb->lwb_lock)); /* * Clear the zilog's field to indicate this lwb is no longer @@ -1019,7 +1020,7 @@ zil_create(zilog_t *zilog) } error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, - ZIL_MIN_BLKSZ, &slog); + ZIL_MIN_BLKSZ, ZIL_MIN_BLKSZ, &slog, B_TRUE); if (error == 0) zil_init_log_chain(zilog, &blk); } @@ -1028,7 +1029,7 @@ zil_create(zilog_t *zilog) * Allocate a log write block (lwb) for the first log block. */ if (error == 0) - lwb = zil_alloc_lwb(zilog, 0, &blk, slog, txg, LWB_STATE_NEW); + lwb = zil_alloc_lwb(zilog, &blk, 0, 0, slog, txg); /* * If we just allocated the first log block, commit our transaction @@ -1324,10 +1325,12 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) * zil_commit() is racing with spa_sync(). */ static void -zil_commit_waiter_skip(zil_commit_waiter_t *zcw) +zil_commit_waiter_done(zil_commit_waiter_t *zcw, int err) { mutex_enter(&zcw->zcw_lock); ASSERT3B(zcw->zcw_done, ==, B_FALSE); + zcw->zcw_lwb = NULL; + zcw->zcw_error = err; zcw->zcw_done = B_TRUE; cv_broadcast(&zcw->zcw_cv); mutex_exit(&zcw->zcw_lock); @@ -1389,7 +1392,7 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) if (zil_nocacheflush) return; - mutex_enter(&lwb->lwb_vdev_lock); + mutex_enter(&lwb->lwb_lock); for (i = 0; i < ndvas; i++) { zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); if (avl_find(t, &zvsearch, &where) == NULL) { @@ -1398,7 +1401,7 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp) avl_insert(t, zv, where); } } - mutex_exit(&lwb->lwb_vdev_lock); + mutex_exit(&lwb->lwb_lock); } static void @@ -1415,12 +1418,12 @@ zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb) /* * While 'lwb' is at a point in its lifetime where lwb_vdev_tree does - * not need the protection of lwb_vdev_lock (it will only be modified + * not need the protection of lwb_lock (it will only be modified * while holding zilog->zl_lock) as its writes and those of its * children have all completed. The younger 'nlwb' may be waiting on * future writes to additional vdevs. */ - mutex_enter(&nlwb->lwb_vdev_lock); + mutex_enter(&nlwb->lwb_lock); /* * Tear down the 'lwb' vdev tree, ensuring that entries which do not * exist in 'nlwb' are moved to it, freeing any would-be duplicates. @@ -1434,7 +1437,7 @@ zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb) kmem_free(zv, sizeof (*zv)); } } - mutex_exit(&nlwb->lwb_vdev_lock); + mutex_exit(&nlwb->lwb_lock); } void @@ -1491,10 +1494,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zil_itx_destroy(itx, 0); while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { - mutex_enter(&zcw->zcw_lock); - - ASSERT3P(zcw->zcw_lwb, ==, lwb); - zcw->zcw_lwb = NULL; /* * We expect any ZIO errors from child ZIOs to have been * propagated "up" to this specific LWB's root ZIO, in @@ -1509,14 +1508,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio) * errors not being handled correctly here. See the * comment above the call to "zio_flush" for details. */ - - zcw->zcw_zio_error = zio->io_error; - - ASSERT3B(zcw->zcw_done, ==, B_FALSE); - zcw->zcw_done = B_TRUE; - cv_broadcast(&zcw->zcw_cv); - - mutex_exit(&zcw->zcw_lock); + zil_commit_waiter_done(zcw, zio->io_error); } uint64_t txg = lwb->lwb_issued_txg; @@ -1588,7 +1580,7 @@ zil_lwb_write_done(zio_t *zio) avl_tree_t *t = &lwb->lwb_vdev_tree; void *cookie = NULL; zil_vdev_node_t *zv; - lwb_t *nlwb; + lwb_t *nlwb = NULL; ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0); @@ -1608,9 +1600,11 @@ zil_lwb_write_done(zio_t *zio) * its write ZIO a parent this ZIO. In such case we can not defer * our flushes or below may be a race between the done callbacks. */ - nlwb = list_next(&zilog->zl_lwb_list, lwb); - if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED) - nlwb = NULL; + if (!(lwb->lwb_flags & LWB_FLAG_CRASHED)) { + nlwb = list_next(&zilog->zl_lwb_list, lwb); + if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED) + nlwb = NULL; + } mutex_exit(&zilog->zl_lock); if (avl_numnodes(t) == 0) @@ -1624,12 +1618,17 @@ zil_lwb_write_done(zio_t *zio) * written out. * * Additionally, we don't perform any further error handling at - * this point (e.g. setting "zcw_zio_error" appropriately), as - * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus, - * we expect any error seen here, to have been propagated to - * that function). + * this point (e.g. setting "zcw_error" appropriately), as we + * expect that to occur in "zil_lwb_flush_vdevs_done" (thus, we + * expect any error seen here, to have been propagated to that + * function). + * + * Note that we treat a "crashed" LWB as though it was in error, + * even if it did appear to succeed, because we've already + * signaled error and cleaned up waiters and committers in + * zil_crash(); we just want to clean up and get out of here. */ - if (zio->io_error != 0) { + if (zio->io_error != 0 || (lwb->lwb_flags & LWB_FLAG_CRASHED)) { while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) kmem_free(zv, sizeof (*zv)); return; @@ -1742,10 +1741,26 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) return; } + mutex_enter(&lwb->lwb_lock); mutex_enter(&zilog->zl_lock); lwb->lwb_state = LWB_STATE_OPENED; zilog->zl_last_lwb_opened = lwb; mutex_exit(&zilog->zl_lock); + mutex_exit(&lwb->lwb_lock); + + /* + * Allocate buffer and set up LWB capacities. + */ + ASSERT0P(lwb->lwb_buf); + ASSERT3U(lwb->lwb_sz, >, 0); + lwb->lwb_buf = zio_buf_alloc(lwb->lwb_sz); + if (lwb->lwb_flags & LWB_FLAG_SLIM) { + lwb->lwb_nmax = lwb->lwb_sz; + lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t); + } else { + lwb->lwb_nmax = lwb->lwb_sz - sizeof (zil_chain_t); + lwb->lwb_nused = lwb->lwb_nfilled = 0; + } } /* @@ -1762,6 +1777,8 @@ static uint_t zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) { uint_t md = zilog->zl_max_block_size - sizeof (zil_chain_t); + uint_t waste = zil_max_waste_space(zilog); + waste = MAX(waste, zilog->zl_cur_max); if (size <= md) { /* @@ -1772,9 +1789,10 @@ zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) } else if (size > 8 * md) { /* * Big bursts use maximum blocks. The first block size - * is hard to predict, but it does not really matter. + * is hard to predict, but we need at least enough space + * to make reasonable progress. */ - *minsize = 0; + *minsize = waste; return (md); } @@ -1787,57 +1805,52 @@ zil_lwb_plan(zilog_t *zilog, uint64_t size, uint_t *minsize) uint_t s = size; uint_t n = DIV_ROUND_UP(s, md - sizeof (lr_write_t)); uint_t chunk = DIV_ROUND_UP(s, n); - uint_t waste = zil_max_waste_space(zilog); - waste = MAX(waste, zilog->zl_cur_max); if (chunk <= md - waste) { *minsize = MAX(s - (md - waste) * (n - 1), waste); return (chunk); } else { - *minsize = 0; + *minsize = waste; return (md); } } /* * Try to predict next block size based on previous history. Make prediction - * sufficient for 7 of 8 previous bursts. Don't try to save if the saving is - * less then 50%, extra writes may cost more, but we don't want single spike - * to badly affect our predictions. + * sufficient for 7 of 8 previous bursts, but don't try to save if the saving + * is less then 50%. Extra writes may cost more, but we don't want single + * spike to badly affect our predictions. */ -static uint_t -zil_lwb_predict(zilog_t *zilog) +static void +zil_lwb_predict(zilog_t *zilog, uint64_t *min_predict, uint64_t *max_predict) { - uint_t m, o; + uint_t m1 = 0, m2 = 0, o; - /* If we are in the middle of a burst, take it into account also. */ - if (zilog->zl_cur_size > 0) { - o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m); - } else { + /* If we are in the middle of a burst, take it as another data point. */ + if (zilog->zl_cur_size > 0) + o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m1); + else o = UINT_MAX; - m = 0; - } - /* Find minimum optimal size. We don't need to go below that. */ - for (int i = 0; i < ZIL_BURSTS; i++) - o = MIN(o, zilog->zl_prev_opt[i]); - - /* Find two biggest minimal first block sizes above the optimal. */ - uint_t m1 = MAX(m, o), m2 = o; + /* Find two largest minimal first block sizes. */ for (int i = 0; i < ZIL_BURSTS; i++) { - m = zilog->zl_prev_min[i]; - if (m >= m1) { + uint_t cur = zilog->zl_prev_min[i]; + if (cur >= m1) { m2 = m1; - m1 = m; - } else if (m > m2) { - m2 = m; + m1 = cur; + } else if (cur > m2) { + m2 = cur; } } - /* - * If second minimum size gives 50% saving -- use it. It may cost us - * one additional write later, but the space saving is just too big. - */ - return ((m1 < m2 * 2) ? m1 : m2); + /* Minimum should guarantee progress in most cases. */ + *min_predict = (m1 < m2 * 2) ? m1 : m2; + + /* Maximum doesn't need to go below the minimum optimal size. */ + for (int i = 0; i < ZIL_BURSTS; i++) + o = MIN(o, zilog->zl_prev_opt[i]); + m1 = MAX(m1, o); + m2 = MAX(m2, o); + *max_predict = (m1 < m2 * 2) ? m1 : m2; } /* @@ -1845,12 +1858,13 @@ zil_lwb_predict(zilog_t *zilog) * Has to be called under zl_issuer_lock to chain more lwbs. */ static lwb_t * -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state) +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) { - uint64_t blksz, plan, plan2; + uint64_t minbs, maxbs; ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); + membar_producer(); lwb->lwb_state = LWB_STATE_CLOSED; /* @@ -1875,27 +1889,34 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state) * Try to predict what can it be and plan for the worst case. */ uint_t m; - plan = zil_lwb_plan(zilog, zilog->zl_cur_left, &m); + maxbs = zil_lwb_plan(zilog, zilog->zl_cur_left, &m); + minbs = m; if (zilog->zl_parallel) { - plan2 = zil_lwb_plan(zilog, zilog->zl_cur_left + - zil_lwb_predict(zilog), &m); - if (plan < plan2) - plan = plan2; + uint64_t minp, maxp; + zil_lwb_predict(zilog, &minp, &maxp); + maxp = zil_lwb_plan(zilog, zilog->zl_cur_left + maxp, + &m); + if (maxbs < maxp) + maxbs = maxp; } } else { /* * The previous burst is done and we can only predict what * will come next. */ - plan = zil_lwb_predict(zilog); + zil_lwb_predict(zilog, &minbs, &maxbs); } - blksz = plan + sizeof (zil_chain_t); - blksz = P2ROUNDUP_TYPED(blksz, ZIL_MIN_BLKSZ, uint64_t); - blksz = MIN(blksz, zilog->zl_max_block_size); - DTRACE_PROBE3(zil__block__size, zilog_t *, zilog, uint64_t, blksz, - uint64_t, plan); - return (zil_alloc_lwb(zilog, blksz, NULL, 0, 0, state)); + minbs += sizeof (zil_chain_t); + maxbs += sizeof (zil_chain_t); + minbs = P2ROUNDUP_TYPED(minbs, ZIL_MIN_BLKSZ, uint64_t); + maxbs = P2ROUNDUP_TYPED(maxbs, ZIL_MIN_BLKSZ, uint64_t); + maxbs = MIN(maxbs, zilog->zl_max_block_size); + minbs = MIN(minbs, maxbs); + DTRACE_PROBE3(zil__block__size, zilog_t *, zilog, uint64_t, minbs, + uint64_t, maxbs); + + return (zil_alloc_lwb(zilog, NULL, minbs, maxbs, 0, 0)); } /* @@ -1944,14 +1965,16 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) mutex_exit(&zilog->zl_lock); next_lwb: - if (lwb->lwb_slim) + if (lwb->lwb_flags & LWB_FLAG_SLIM) zilc = (zil_chain_t *)lwb->lwb_buf; else zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_nmax); - int wsz = lwb->lwb_sz; + uint64_t alloc_size = BP_GET_LSIZE(&lwb->lwb_blk); + int wsz = alloc_size; if (lwb->lwb_error == 0) { abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, lwb->lwb_sz); - if (!lwb->lwb_slog || zilog->zl_cur_size <= zil_slog_bulk) + if (!(lwb->lwb_flags & LWB_FLAG_SLOG) || + zilog->zl_cur_size <= zil_slog_bulk) prio = ZIO_PRIORITY_SYNC_WRITE; else prio = ZIO_PRIORITY_ASYNC_WRITE; @@ -1959,16 +1982,17 @@ next_lwb: ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0, - &lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done, + &lwb->lwb_blk, lwb_abd, alloc_size, zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb); zil_lwb_add_block(lwb, &lwb->lwb_blk); - if (lwb->lwb_slim) { + if (lwb->lwb_flags & LWB_FLAG_SLIM) { /* For Slim ZIL only write what is used. */ wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int); - ASSERT3S(wsz, <=, lwb->lwb_sz); - zio_shrink(lwb->lwb_write_zio, wsz); + ASSERT3S(wsz, <=, alloc_size); + if (wsz < alloc_size) + zio_shrink(lwb->lwb_write_zio, wsz); wsz = lwb->lwb_write_zio->io_size; } memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused); @@ -2004,13 +2028,53 @@ next_lwb: BP_ZERO(bp); error = lwb->lwb_error; if (error == 0) { - error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, nlwb->lwb_sz, - &slog); + /* + * Allocation flexibility depends on LWB state: + * if NEW: allow range allocation and larger sizes; + * if OPENED: use fixed predetermined allocation size; + * if CLOSED + Slim: allocate precisely for actual usage. + */ + boolean_t flexible = (nlwb->lwb_state == LWB_STATE_NEW); + if (flexible) { + /* We need to prevent opening till we update lwb_sz. */ + mutex_enter(&nlwb->lwb_lock); + flexible = (nlwb->lwb_state == LWB_STATE_NEW); + if (!flexible) + mutex_exit(&nlwb->lwb_lock); /* We lost. */ + } + boolean_t closed_slim = (nlwb->lwb_state == LWB_STATE_CLOSED && + (lwb->lwb_flags & LWB_FLAG_SLIM)); + + uint64_t min_size, max_size; + if (closed_slim) { + /* This transition is racy, but only one way. */ + membar_consumer(); + min_size = max_size = P2ROUNDUP_TYPED(nlwb->lwb_nused, + ZIL_MIN_BLKSZ, uint64_t); + } else if (flexible) { + min_size = nlwb->lwb_min_sz; + max_size = nlwb->lwb_sz; + } else { + min_size = max_size = nlwb->lwb_sz; + } + + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, + min_size, max_size, &slog, flexible); + if (error == 0) { + if (closed_slim) + ASSERT3U(BP_GET_LSIZE(bp), ==, max_size); + else if (flexible) + nlwb->lwb_sz = BP_GET_LSIZE(bp); + else + ASSERT3U(BP_GET_LSIZE(bp), ==, nlwb->lwb_sz); + } + if (flexible) + mutex_exit(&nlwb->lwb_lock); } if (error == 0) { ASSERT3U(BP_GET_BIRTH(bp), ==, txg); - BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : - ZIO_CHECKSUM_ZILOG); + BP_SET_CHECKSUM(bp, (nlwb->lwb_flags & LWB_FLAG_SLIM) ? + ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; } @@ -2039,14 +2103,15 @@ next_lwb: if (nlwb) { nlwb->lwb_blk = *bp; nlwb->lwb_error = error; - nlwb->lwb_slog = slog; + if (slog) + nlwb->lwb_flags |= LWB_FLAG_SLOG; nlwb->lwb_alloc_txg = txg; if (nlwb->lwb_state != LWB_STATE_READY) nlwb = NULL; } mutex_exit(&zilog->zl_lock); - if (lwb->lwb_slog) { + if (lwb->lwb_flags & LWB_FLAG_SLOG) { ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count); ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes, lwb->lwb_nused); @@ -2220,7 +2285,6 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs) ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); ASSERT3P(lwb, !=, NULL); - ASSERT3P(lwb->lwb_buf, !=, NULL); zil_lwb_write_open(zilog, lwb); @@ -2262,9 +2326,10 @@ cont: (dlen % max_log_data == 0 || lwb_sp < reclen + dlen % max_log_data))) { list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_OPENED); + lwb = zil_lwb_write_close(zilog, lwb); if (lwb == NULL) return (NULL); + zil_lwb_write_open(zilog, lwb); lwb_sp = lwb->lwb_nmax - lwb->lwb_nused; } @@ -2554,7 +2619,7 @@ zil_itxg_clean(void *arg) * called) we will hit this case. */ if (itx->itx_lr.lrc_txtype == TX_COMMIT) - zil_commit_waiter_skip(itx->itx_private); + zil_commit_waiter_done(itx->itx_private, 0); zil_itx_destroy(itx, 0); } @@ -2742,6 +2807,7 @@ zil_crash_clean(zilog_t *zilog, uint64_t synced_txg) } /* This LWB is from the past, so we can clean it up now. */ + ASSERT(lwb->lwb_flags & LWB_FLAG_CRASHED); list_remove(&zilog->zl_lwb_crash_list, lwb); if (lwb->lwb_buf != NULL) zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); @@ -2981,7 +3047,7 @@ zil_prune_commit_list(zilog_t *zilog) * never any itx's for it to wait on), so it's * safe to skip this waiter and mark it done. */ - zil_commit_waiter_skip(itx->itx_private); + zil_commit_waiter_done(itx->itx_private, 0); } else { zil_commit_waiter_link_lwb(itx->itx_private, last_lwb); } @@ -3212,15 +3278,21 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) * "next" lwb on-disk. When this happens, we must stall * the ZIL write pipeline; see the comment within * zil_commit_writer_stall() for more details. + * + * ESHUTDOWN has to be handled carefully here. If we get it, + * then the pool suspended and zil_crash() was called, so we + * need to stop trying and just get an error back to the + * callers. */ int err = 0; while ((lwb = list_remove_head(ilwbs)) != NULL) { - err = zil_lwb_write_issue(zilog, lwb); - if (err != 0) - break; + if (err == 0) + err = zil_lwb_write_issue(zilog, lwb); } - if (err == 0) + if (err != ESHUTDOWN) err = zil_commit_writer_stall(zilog); + if (err == ESHUTDOWN) + err = SET_ERROR(EIO); /* * Additionally, we have to signal and mark the "nolwb" @@ -3230,7 +3302,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) */ zil_commit_waiter_t *zcw; while ((zcw = list_remove_head(&nolwb_waiters)) != NULL) - zil_commit_waiter_skip(zcw); + zil_commit_waiter_done(zcw, err); /* * And finally, we have to destroy the itx's that @@ -3238,7 +3310,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) * the itx's callback if one exists for the itx. */ while ((itx = list_remove_head(&nolwb_itxs)) != NULL) - zil_itx_destroy(itx, 0); + zil_itx_destroy(itx, err); } else { ASSERT(list_is_empty(&nolwb_waiters)); ASSERT3P(lwb, !=, NULL); @@ -3292,17 +3364,17 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) (!zilog->zl_parallel || zilog->zl_suspend > 0)) { zil_burst_done(zilog); list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW); + lwb = zil_lwb_write_close(zilog, lwb); if (lwb == NULL) { int err = 0; while ((lwb = list_remove_head(ilwbs)) != NULL) { - err = zil_lwb_write_issue(zilog, lwb); - if (err != 0) - break; + if (err == 0) + err = zil_lwb_write_issue( + zilog, lwb); } - if (err == 0) - zil_commit_writer_stall(zilog); + if (err != ESHUTDOWN) + (void) zil_commit_writer_stall(zilog); } } } @@ -3470,7 +3542,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw) * hasn't been issued. */ zil_burst_done(zilog); - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW); + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED); @@ -3546,7 +3618,7 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw) * commit itxs. When this occurs, the commit waiters linked * off of these commit itxs will not be committed to an * lwb. Additionally, these commit waiters will not be - * marked done until zil_commit_waiter_skip() is called via + * marked done until zil_commit_waiter_done() is called via * zil_itxg_clean(). * * Thus, it's possible for this commit waiter (i.e. the @@ -3624,7 +3696,7 @@ zil_alloc_commit_waiter(void) list_link_init(&zcw->zcw_node); zcw->zcw_lwb = NULL; zcw->zcw_done = B_FALSE; - zcw->zcw_zio_error = 0; + zcw->zcw_error = 0; return (zcw); } @@ -3728,6 +3800,9 @@ zil_crash(zilog_t *zilog) */ for (lwb_t *lwb = list_head(&zilog->zl_lwb_crash_list); lwb != NULL; lwb = list_next(&zilog->zl_lwb_crash_list, lwb)) { + ASSERT(!(lwb->lwb_flags & LWB_FLAG_CRASHED)); + lwb->lwb_flags |= LWB_FLAG_CRASHED; + itx_t *itx; while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) zil_itx_destroy(itx, EIO); @@ -3736,7 +3811,7 @@ zil_crash(zilog_t *zilog) while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { mutex_enter(&zcw->zcw_lock); zcw->zcw_lwb = NULL; - zcw->zcw_zio_error = EIO; + zcw->zcw_error = EIO; zcw->zcw_done = B_TRUE; cv_broadcast(&zcw->zcw_cv); mutex_exit(&zcw->zcw_lock); @@ -4014,7 +4089,7 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) zil_commit_waiter(zilog, zcw); int err = 0; - if (zcw->zcw_zio_error != 0) { + if (zcw->zcw_error != 0) { /* * If there was an error writing out the ZIL blocks that * this thread is waiting on, then we fallback to @@ -4149,7 +4224,7 @@ zil_lwb_cons(void *vbuf, void *unused, int kmflag) offsetof(zil_commit_waiter_t, zcw_node)); avl_create(&lwb->lwb_vdev_tree, zil_lwb_vdev_compare, sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node)); - mutex_init(&lwb->lwb_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&lwb->lwb_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } @@ -4158,7 +4233,7 @@ zil_lwb_dest(void *vbuf, void *unused) { (void) unused; lwb_t *lwb = vbuf; - mutex_destroy(&lwb->lwb_vdev_lock); + mutex_destroy(&lwb->lwb_lock); avl_destroy(&lwb->lwb_vdev_tree); list_destroy(&lwb->lwb_waiters); list_destroy(&lwb->lwb_itxs); @@ -4381,7 +4456,7 @@ zil_close(zilog_t *zilog) if (lwb != NULL) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); ASSERT3S(lwb->lwb_state, ==, LWB_STATE_NEW); - zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + ASSERT0P(lwb->lwb_buf); zil_free_lwb(zilog, lwb); } mutex_exit(&zilog->zl_lock); @@ -4472,16 +4547,16 @@ zil_suspend(const char *osname, void **cookiep) cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); mutex_exit(&zilog->zl_lock); - if (cookiep == NULL) + if (zilog->zl_restart_txg > 0) { + /* ZIL crashed while we were waiting. */ + zil_resume(os); + error = SET_ERROR(EBUSY); + } else if (cookiep == NULL) zil_resume(os); else *cookiep = os; - if (zilog->zl_restart_txg > 0) - /* ZIL crashed while we were waiting. */ - return (SET_ERROR(EBUSY)); - - return (0); + return (error); } /* diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 3f0ddb63249d..4cf8912d4269 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -4434,12 +4434,15 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) */ int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, - uint64_t size, boolean_t *slog) + uint64_t min_size, uint64_t max_size, boolean_t *slog, + boolean_t allow_larger) { int error; zio_alloc_list_t io_alloc_list; + uint64_t alloc_size = 0; ASSERT(txg > spa_syncing_txg(spa)); + ASSERT3U(min_size, <=, max_size); metaslab_trace_init(&io_alloc_list); @@ -4448,7 +4451,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, * Fill in the obvious ones before calling into metaslab_alloc(). */ BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG); - BP_SET_PSIZE(new_bp, size); + BP_SET_PSIZE(new_bp, max_size); BP_SET_LEVEL(new_bp, 0); /* @@ -4463,43 +4466,51 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, ZIOSTAT_BUMP(ziostat_total_allocations); /* Try log class (dedicated slog devices) first */ - error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, - txg, NULL, flags, &io_alloc_list, allocator, NULL); + error = metaslab_alloc_range(spa, spa_log_class(spa), min_size, + max_size, new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, + NULL, &alloc_size); *slog = (error == 0); /* Try special_embedded_log class (reserved on special vdevs) */ if (error != 0) { - error = metaslab_alloc(spa, spa_special_embedded_log_class(spa), - size, new_bp, 1, txg, NULL, flags, &io_alloc_list, - allocator, NULL); + error = metaslab_alloc_range(spa, + spa_special_embedded_log_class(spa), min_size, max_size, + new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, + NULL, &alloc_size); } /* Try special class (general special vdev allocation) */ if (error != 0) { - error = metaslab_alloc(spa, spa_special_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_special_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } /* Try embedded_log class (reserved on normal vdevs) */ if (error != 0) { - error = metaslab_alloc(spa, spa_embedded_log_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_embedded_log_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } /* Finally fall back to normal class */ if (error != 0) { ZIOSTAT_BUMP(ziostat_alloc_class_fallbacks); - error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, NULL, flags, &io_alloc_list, allocator, - NULL); + error = metaslab_alloc_range(spa, spa_normal_class(spa), + min_size, max_size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, allocator, NULL, &alloc_size); } metaslab_trace_fini(&io_alloc_list); if (error == 0) { - BP_SET_LSIZE(new_bp, size); - BP_SET_PSIZE(new_bp, size); + if (!allow_larger) + alloc_size = MIN(alloc_size, max_size); + else if (max_size <= SPA_OLD_MAXBLOCKSIZE) + alloc_size = MIN(alloc_size, SPA_OLD_MAXBLOCKSIZE); + alloc_size = P2ALIGN_TYPED(alloc_size, ZIL_MIN_BLKSZ, uint64_t); + + BP_SET_LSIZE(new_bp, alloc_size); + BP_SET_PSIZE(new_bp, alloc_size); BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); BP_SET_CHECKSUM(new_bp, spa_version(spa) >= SPA_VERSION_SLIM_ZIL @@ -4527,8 +4538,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, } } else { zfs_dbgmsg("%s: zil block allocation failure: " - "size %llu, error %d", spa_name(spa), (u_longlong_t)size, - error); + "min_size %llu, max_size %llu, error %d", spa_name(spa), + (u_longlong_t)min_size, (u_longlong_t)max_size, error); } return (error); diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 29f51e230a37..2fd3e1c37045 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -38,25 +38,36 @@ * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright (c) 2012, 2019 by Delphix. All rights reserved. - * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2024, 2025, Klara, Inc. */ /* * Note on locking of zvol state structures. * - * These structures are used to maintain internal state used to emulate block - * devices on top of zvols. In particular, management of device minor number - * operations - create, remove, rename, and set_snapdev - involves access to - * these structures. The zvol_state_lock is primarily used to protect the - * zvol_state_list. The zv->zv_state_lock is used to protect the contents - * of the zvol_state_t structures, as well as to make sure that when the - * time comes to remove the structure from the list, it is not in use, and - * therefore, it can be taken off zvol_state_list and freed. + * zvol_state_t represents the connection between a single dataset + * (DMU_OST_ZVOL) and the device "minor" (some OS-specific representation of a + * "disk" or "device" or "volume", eg, a /dev/zdXX node, a GEOM object, etc). * - * The zv_suspend_lock was introduced to allow for suspending I/O to a zvol, - * e.g. for the duration of receive and rollback operations. This lock can be - * held for significant periods of time. Given that it is undesirable to hold - * mutexes for long periods of time, the following lock ordering applies: + * The global zvol_state_lock is used to protect access to zvol_state_list and + * zvol_htable, which are the primary way to obtain a zvol_state_t from a name. + * It should not be used for anything not name-relateds, and you should avoid + * sleeping or waiting while its held. See zvol_find_by_name(), zvol_insert(), + * zvol_remove(). + * + * The zv_state_lock is used to protect the contents of the associated + * zvol_state_t. Most of the zvol_state_t is dedicated to control and + * configuration; almost none of it is needed for data operations (that is, + * read, write, flush) so this lock is rarely taken during general IO. It + * should be released quickly; you should avoid sleeping or waiting while its + * held. + * + * zv_suspend_lock is used to suspend IO/data operations to a zvol. The read + * half should held for the duration of an IO operation. The write half should + * be taken when something to wait for IO to complete and the block further IO, + * eg for the duration of receive and rollback operations. This lock can be + * held for long periods of time. + * + * Thus, the following lock ordering appies. * - take zvol_state_lock if necessary, to protect zvol_state_list * - take zv_suspend_lock if necessary, by the code path in question * - take zv_state_lock to protect zvol_state_t @@ -67,9 +78,8 @@ * these operations are serialized per pool. Consequently, we can be certain * that for a given zvol, there is only one operation at a time in progress. * That is why one can be sure that first, zvol_state_t for a given zvol is - * allocated and placed on zvol_state_list, and then other minor operations - * for this zvol are going to proceed in the order of issue. - * + * allocated and placed on zvol_state_list, and then other minor operations for + * this zvol are going to proceed in the order of issue. */ #include <sys/dataset_kstats.h> @@ -1570,184 +1580,156 @@ zvol_create_minors_impl(zvol_task_t *task) } /* - * Remove minors for specified dataset including children and snapshots. - */ - -/* - * Remove the minor for a given zvol. This will do it all: - * - flag the zvol for removal, so new requests are rejected - * - wait until outstanding requests are completed - * - remove it from lists - * - free it - * It's also usable as a taskq task, and smells nice too. + * Remove minors for specified dataset and, optionally, its children and + * snapshots. */ static void -zvol_remove_minor_task(void *arg) -{ - zvol_state_t *zv = (zvol_state_t *)arg; - - ASSERT(!RW_LOCK_HELD(&zvol_state_lock)); - ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); - - mutex_enter(&zv->zv_state_lock); - while (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { - zv->zv_flags |= ZVOL_REMOVING; - cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); - } - mutex_exit(&zv->zv_state_lock); - - rw_enter(&zvol_state_lock, RW_WRITER); - mutex_enter(&zv->zv_state_lock); - - zvol_remove(zv); - zvol_os_clear_private(zv); - - mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); - - zvol_os_free(zv); -} - -static void -zvol_free_task(void *arg) -{ - zvol_os_free(arg); -} - -static void zvol_remove_minors_impl(zvol_task_t *task) { zvol_state_t *zv, *zv_next; const char *name = task ? task->zt_name1 : NULL; int namelen = ((name) ? strlen(name) : 0); - taskqid_t t; - list_t delay_list, free_list; + boolean_t children = task ? !!task->zt_value : B_TRUE; if (zvol_inhibit_dev) return; - list_create(&delay_list, sizeof (zvol_state_t), - offsetof(zvol_state_t, zv_next)); - list_create(&free_list, sizeof (zvol_state_t), - offsetof(zvol_state_t, zv_next)); + /* + * We collect up zvols that we want to remove on a separate list, so + * that we don't have to hold zvol_state_lock for the whole time. + * + * We can't remove them from the global lists until we're completely + * done with them, because that would make them appear to ZFS-side ops + * that they don't exist, and the name might be reused, which can't be + * good. + */ + list_t remove_list; + list_create(&remove_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_remove_node)); - rw_enter(&zvol_state_lock, RW_WRITER); + rw_enter(&zvol_state_lock, RW_READER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); mutex_enter(&zv->zv_state_lock); + if (zv->zv_flags & ZVOL_REMOVING) { + /* Another thread is handling shutdown, skip it. */ + mutex_exit(&zv->zv_state_lock); + continue; + } + + /* + * This zvol should be removed if: + * - no name was offered (ie removing all at shutdown); or + * - name matches exactly; or + * - we were asked to remove children, and + * - the start of the name matches, and + * - there is a '/' immediately after the matched name; or + * - there is a '@' immediately after the matched name + */ if (name == NULL || strcmp(zv->zv_name, name) == 0 || - (strncmp(zv->zv_name, name, namelen) == 0 && + (children && strncmp(zv->zv_name, name, namelen) == 0 && (zv->zv_name[namelen] == '/' || zv->zv_name[namelen] == '@'))) { - /* - * By holding zv_state_lock here, we guarantee that no - * one is currently using this zv - */ /* - * If in use, try to throw everyone off and try again - * later. + * Matched, so mark it removal. We want to take the + * write half of the suspend lock to make sure that + * the zvol is not suspended, and give any data ops + * chance to finish. */ - if (zv->zv_open_count > 0 || - atomic_read(&zv->zv_suspend_ref)) { - zv->zv_flags |= ZVOL_REMOVING; - t = taskq_dispatch( - zv->zv_objset->os_spa->spa_zvol_taskq, - zvol_remove_minor_task, zv, TQ_SLEEP); - if (t == TASKQID_INVALID) { - /* - * Couldn't create the task, so we'll - * do it in place once the loop is - * finished. - */ - list_insert_head(&delay_list, zv); - } + mutex_exit(&zv->zv_state_lock); + rw_enter(&zv->zv_suspend_lock, RW_WRITER); + mutex_enter(&zv->zv_state_lock); + + if (zv->zv_flags & ZVOL_REMOVING) { + /* Another thread has taken it, let them. */ mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); continue; } - zvol_remove(zv); - /* - * Cleared while holding zvol_state_lock as a writer - * which will prevent zvol_open() from opening it. + * Mark it and unlock. New entries will see the flag + * and return ENXIO. */ - zvol_os_clear_private(zv); - - /* Drop zv_state_lock before zvol_free() */ + zv->zv_flags |= ZVOL_REMOVING; mutex_exit(&zv->zv_state_lock); + rw_exit(&zv->zv_suspend_lock); - /* Try parallel zv_free, if failed do it in place */ - t = taskq_dispatch(system_taskq, zvol_free_task, zv, - TQ_SLEEP); - if (t == TASKQID_INVALID) - list_insert_head(&free_list, zv); - } else { + /* Put it on the list for the next stage. */ + list_insert_head(&remove_list, zv); + } else mutex_exit(&zv->zv_state_lock); - } } - rw_exit(&zvol_state_lock); - /* Wait for zvols that we couldn't create a remove task for */ - while ((zv = list_remove_head(&delay_list)) != NULL) - zvol_remove_minor_task(zv); - - /* Free any that we couldn't free in parallel earlier */ - while ((zv = list_remove_head(&free_list)) != NULL) - zvol_os_free(zv); -} - -/* Remove minor for this specific volume only */ -static int -zvol_remove_minor_impl(const char *name) -{ - zvol_state_t *zv = NULL, *zv_next; - - if (zvol_inhibit_dev) - return (0); + rw_exit(&zvol_state_lock); - rw_enter(&zvol_state_lock, RW_WRITER); + /* Didn't match any, nothing to do! */ + if (list_is_empty(&remove_list)) { + if (task) + task->zt_error = SET_ERROR(ENOENT); + return; + } - for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { - zv_next = list_next(&zvol_state_list, zv); + /* Actually shut them all down. */ + for (zv = list_head(&remove_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&remove_list, zv); mutex_enter(&zv->zv_state_lock); - if (strcmp(zv->zv_name, name) == 0) - /* Found, leave the the loop with zv_lock held */ - break; - mutex_exit(&zv->zv_state_lock); - } - - if (zv == NULL) { - rw_exit(&zvol_state_lock); - return (SET_ERROR(ENOENT)); - } - ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + /* + * Still open or suspended, just wait. This can happen if, for + * example, we managed to acquire zv_state_lock in the moments + * where zvol_open() or zvol_release() are trading locks to + * call zvol_first_open() or zvol_last_close(). + */ + while (zv->zv_open_count > 0 || + atomic_read(&zv->zv_suspend_ref)) + cv_wait(&zv->zv_removing_cv, &zv->zv_state_lock); - if (zv->zv_open_count > 0 || atomic_read(&zv->zv_suspend_ref)) { /* - * In use, so try to throw everyone off, then wait - * until finished. + * No users, shut down the OS side. This may not remove the + * minor from view immediately, depending on the kernel + * specifics, but it will ensure that it is unusable and that + * this zvol_state_t can never again be reached from an OS-side + * operation. */ - zv->zv_flags |= ZVOL_REMOVING; + zvol_os_remove_minor(zv); mutex_exit(&zv->zv_state_lock); + + /* Remove it from the name lookup lists */ + rw_enter(&zvol_state_lock, RW_WRITER); + zvol_remove(zv); rw_exit(&zvol_state_lock); - zvol_remove_minor_task(zv); - return (0); } - zvol_remove(zv); - zvol_os_clear_private(zv); + /* + * Our own references on remove_list is the last one, free them and + * we're done. + */ + while ((zv = list_remove_head(&remove_list)) != NULL) + zvol_os_free(zv); - mutex_exit(&zv->zv_state_lock); - rw_exit(&zvol_state_lock); + list_destroy(&remove_list); +} - zvol_os_free(zv); +/* Remove minor for this specific volume only */ +static int +zvol_remove_minor_impl(const char *name) +{ + if (zvol_inhibit_dev) + return (0); - return (0); + zvol_task_t task; + memset(&task, 0, sizeof (zvol_task_t)); + strlcpy(task.zt_name1, name, sizeof (task.zt_name1)); + task.zt_value = B_FALSE; + + zvol_remove_minors_impl(&task); + + return (task.zt_error); } /* @@ -2067,6 +2049,7 @@ zvol_remove_minors(spa_t *spa, const char *name, boolean_t async) task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP); task->zt_op = ZVOL_ASYNC_REMOVE_MINORS; strlcpy(task->zt_name1, name, sizeof (task->zt_name1)); + task->zt_value = B_TRUE; id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP); if ((async == B_FALSE) && (id != TASKQID_INVALID)) taskq_wait_id(spa->spa_zvol_taskq, id); @@ -2188,14 +2171,6 @@ zvol_fini_impl(void) zvol_remove_minors_impl(NULL); - /* - * The call to "zvol_remove_minors_impl" may dispatch entries to - * the system_taskq, but it doesn't wait for those entries to - * complete before it returns. Thus, we must wait for all of the - * removals to finish, before we can continue. - */ - taskq_wait_outstanding(system_taskq, 0); - kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head)); list_destroy(&zvol_state_list); rw_destroy(&zvol_state_lock); diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c index 391216d6e263..3db196953f74 100644 --- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c +++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c @@ -876,9 +876,9 @@ static void __init zstd_mempool_init(void) { zstd_mempool_cctx = - kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); + vmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); zstd_mempool_dctx = - kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); + vmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); for (int i = 0; i < ZSTD_POOL_MAX; i++) { mutex_init(&zstd_mempool_cctx[i].barrier, NULL, @@ -924,8 +924,8 @@ zstd_mempool_deinit(void) release_pool(&zstd_mempool_dctx[i]); } - kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); - kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); + vmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); + vmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); zstd_mempool_dctx = NULL; zstd_mempool_cctx = NULL; } diff --git a/sys/contrib/openzfs/scripts/spdxcheck.pl b/sys/contrib/openzfs/scripts/spdxcheck.pl index 88f5a235d70c..cdab5368f19c 100755 --- a/sys/contrib/openzfs/scripts/spdxcheck.pl +++ b/sys/contrib/openzfs/scripts/spdxcheck.pl @@ -190,6 +190,7 @@ my @path_license_tags = ( ['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'], 'module/icp' => ['Apache-2.0', 'CDDL-1.0'], + 'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'], # Python bindings are always Apache-2.0 'contrib/pyzfs' => ['Apache-2.0'], diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 131845f5ed40..2da46458289a 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -1093,7 +1093,7 @@ tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse', tags = ['functional', 'zvol', 'zvol_misc'] [tests/functional/zvol/zvol_stress] -tests = ['zvol_stress'] +tests = ['zvol_stress', 'zvol_stress_destroy'] tags = ['functional', 'zvol', 'zvol_stress'] [tests/functional/zvol/zvol_swap] diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c index e08003f80464..cbebd33e0bf6 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/crypto_test.c @@ -529,6 +529,8 @@ static const char *aes_gcm_impl[][2] = { { "aesni", "pclmulqdq" }, { "x86_64", "avx" }, { "aesni", "avx" }, + { "x86_64", "avx2" }, + { "aesni", "avx2" }, }; /* signature of function to call after setting implementation params */ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index b8b8bbe45a42..41e7b45ef4ec 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -2244,6 +2244,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/zvol/zvol_stress/cleanup.ksh \ functional/zvol/zvol_stress/setup.ksh \ functional/zvol/zvol_stress/zvol_stress.ksh \ + functional/zvol/zvol_stress/zvol_stress_destroy.ksh \ functional/zvol/zvol_swap/cleanup.ksh \ functional/zvol/zvol_swap/setup.ksh \ functional/zvol/zvol_swap/zvol_swap_001_pos.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh new file mode 100755 index 000000000000..669b59fac01f --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress_destroy.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2025, Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +typeset -i nzvols=1000 +typeset -i parallel=$(( $(get_num_cpus) * 2 )) + +function cleanup { + for zvol in $(zfs list -Ho name -t vol) ; do + log_must_busy zfs destroy $zvol + done +} + +log_onexit cleanup + +log_assert "stress test concurrent zvol create/destroy" + +function destroy_zvols_until { + typeset cond=$1 + while true ; do + IFS='' zfs list -Ho name -t vol | read -r -d '' zvols + if [[ -n $zvols ]] ; then + echo $zvols | xargs -n 1 -P $parallel zfs destroy + fi + if ! $cond ; then + break + fi + done +} + +( seq $nzvols | \ + xargs -P $parallel -I % zfs create -s -V 1G $TESTPOOL/testvol% ) & +cpid=$! +sleep 1 + +destroy_zvols_until "kill -0 $cpid" +destroy_zvols_until "false" + +log_pass "stress test done" diff --git a/sys/dev/fdt/fdt_common.c b/sys/dev/fdt/fdt_common.c index 1fea4c6f1392..f43551c6310e 100644 --- a/sys/dev/fdt/fdt_common.c +++ b/sys/dev/fdt/fdt_common.c @@ -62,8 +62,6 @@ SYSCTL_NODE(_hw, OID_AUTO, fdt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Flattened Device Tree"); -struct fdt_ic_list fdt_ic_list_head = SLIST_HEAD_INITIALIZER(fdt_ic_list_head); - static int fdt_get_range_by_busaddr(phandle_t node, u_long addr, u_long *base, u_long *size) diff --git a/sys/dev/fdt/fdt_common.h b/sys/dev/fdt/fdt_common.h index ece54290a6ad..f597233f9771 100644 --- a/sys/dev/fdt/fdt_common.h +++ b/sys/dev/fdt/fdt_common.h @@ -59,13 +59,6 @@ struct fdt_fixup_entry { extern struct fdt_fixup_entry fdt_fixup_table[]; #endif -extern SLIST_HEAD(fdt_ic_list, fdt_ic) fdt_ic_list_head; -struct fdt_ic { - SLIST_ENTRY(fdt_ic) fdt_ics; - ihandle_t iph; - device_t dev; -}; - #if defined(FDT_DTB_STATIC) extern u_char fdt_static_dtb; #endif diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index 170f23615416..0d2455cab399 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -37,6 +37,7 @@ #include <dev/gpio/gpiobusvar.h> #include <dev/gpio/acpi_gpiobusvar.h> #include <dev/gpio/gpiobus_internal.h> +#include <sys/sbuf.h> #include "gpiobus_if.h" @@ -52,12 +53,11 @@ struct acpi_gpiobus_ctx { struct acpi_gpiobus_ivar { - struct gpiobus_ivar gpiobus; /* Must come first */ - ACPI_HANDLE dev_handle; /* ACPI handle for bus */ - uint32_t flags; + struct gpiobus_ivar gpiobus; + ACPI_HANDLE handle; }; -static uint32_t +uint32_t acpi_gpiobus_convflags(ACPI_RESOURCE_GPIO *gpio_res) { uint32_t flags = 0; @@ -150,70 +150,24 @@ acpi_gpiobus_enumerate_res(ACPI_RESOURCE *res, void *context) return (AE_OK); } -static struct acpi_gpiobus_ivar * -acpi_gpiobus_setup_devinfo(device_t bus, device_t child, - ACPI_RESOURCE_GPIO *gpio_res) -{ - struct acpi_gpiobus_ivar *devi; - - devi = malloc(sizeof(*devi), M_DEVBUF, M_NOWAIT | M_ZERO); - if (devi == NULL) - return (NULL); - resource_list_init(&devi->gpiobus.rl); - - devi->flags = acpi_gpiobus_convflags(gpio_res); - if (acpi_quirks & ACPI_Q_AEI_NOPULL) - devi->flags &= ~GPIO_PIN_PULLUP; - - devi->gpiobus.npins = 1; - if (gpiobus_alloc_ivars(&devi->gpiobus) != 0) { - free(devi, M_DEVBUF); - return (NULL); - } - - for (int i = 0; i < devi->gpiobus.npins; i++) - devi->gpiobus.pins[i] = gpio_res->PinTable[i]; - - return (devi); -} - static ACPI_STATUS acpi_gpiobus_enumerate_aei(ACPI_RESOURCE *res, void *context) { ACPI_RESOURCE_GPIO *gpio_res = &res->Data.Gpio; - struct acpi_gpiobus_ctx *ctx = context; - device_t bus = ctx->sc->sc_busdev; - device_t child; - struct acpi_gpiobus_ivar *devi; + uint32_t *npins = context, *pins = npins + 1; - /* Check that we have a GpioInt object. */ + /* + * Check that we have a GpioInt object. + * Note that according to the spec this + * should always be the case. + */ if (res->Type != ACPI_RESOURCE_TYPE_GPIO) return (AE_OK); if (gpio_res->ConnectionType != ACPI_RESOURCE_GPIO_TYPE_INT) return (AE_OK); - /* Add a child. */ - child = device_add_child_ordered(bus, 0, "gpio_aei", DEVICE_UNIT_ANY); - if (child == NULL) - return (AE_OK); - devi = acpi_gpiobus_setup_devinfo(bus, child, gpio_res); - if (devi == NULL) { - device_delete_child(bus, child); - return (AE_OK); - } - device_set_ivars(child, devi); - - for (int i = 0; i < devi->gpiobus.npins; i++) { - if (GPIOBUS_PIN_SETFLAGS(bus, child, 0, devi->flags & - ~GPIO_INTR_MASK)) { - device_delete_child(bus, child); - return (AE_OK); - } - } - - /* Pass ACPI information to children. */ - devi->dev_handle = ctx->dev_handle; - + for (int i = 0; i < gpio_res->PinTableLength; i++) + pins[(*npins)++] = gpio_res->PinTable[i]; return (AE_OK); } @@ -296,6 +250,63 @@ err: return (AE_BAD_PARAMETER); } +static void +acpi_gpiobus_attach_aei(struct acpi_gpiobus_softc *sc, ACPI_HANDLE handle) +{ + struct acpi_gpiobus_ivar *devi; + ACPI_HANDLE aei_handle; + device_t child; + uint32_t *pins; + ACPI_STATUS status; + int err; + + status = AcpiGetHandle(handle, "_AEI", &aei_handle); + if (ACPI_FAILURE(status)) + return; + + /* pins[0] specifies the length of the array. */ + pins = mallocarray(sc->super_sc.sc_npins + 1, + sizeof(uint32_t), M_DEVBUF, M_WAITOK); + pins[0] = 0; + + status = AcpiWalkResources(handle, "_AEI", + acpi_gpiobus_enumerate_aei, pins); + if (ACPI_FAILURE(status)) { + device_printf(sc->super_sc.sc_busdev, + "Failed to enumerate AEI resources\n"); + free(pins, M_DEVBUF); + return; + } + + child = BUS_ADD_CHILD(sc->super_sc.sc_busdev, 0, "gpio_aei", + DEVICE_UNIT_ANY); + if (child == NULL) { + device_printf(sc->super_sc.sc_busdev, + "Failed to add gpio_aei child\n"); + free(pins, M_DEVBUF); + return; + } + + devi = device_get_ivars(child); + devi->gpiobus.npins = pins[0]; + devi->handle = aei_handle; + + err = gpiobus_alloc_ivars(&devi->gpiobus); + if (err != 0) { + device_printf(sc->super_sc.sc_busdev, + "Failed to allocate gpio_aei ivars\n"); + device_delete_child(sc->super_sc.sc_busdev, child); + free(pins, M_DEVBUF); + return; + } + + for (int i = 0; i < pins[0]; i++) + devi->gpiobus.pins[i] = pins[i + 1]; + free(pins, M_DEVBUF); + + bus_attach_children(sc->super_sc.sc_busdev); +} + static int acpi_gpiobus_probe(device_t dev) { @@ -353,13 +364,8 @@ acpi_gpiobus_attach(device_t dev) if (ACPI_FAILURE(status)) device_printf(dev, "Failed to enumerate GPIO resources\n"); - /* Look for AEI children */ - status = AcpiWalkResources(handle, "_AEI", acpi_gpiobus_enumerate_aei, - &ctx); - - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) - device_printf(dev, "Failed to enumerate AEI resources\n"); - + /* Look for AEI child */ + acpi_gpiobus_attach_aei(sc, handle); return (0); } @@ -390,10 +396,7 @@ acpi_gpiobus_read_ivar(device_t dev, device_t child, int which, switch (which) { case ACPI_GPIOBUS_IVAR_HANDLE: - *result = (uintptr_t)devi->dev_handle; - break; - case ACPI_GPIOBUS_IVAR_FLAGS: - *result = (uintptr_t)devi->flags; + *result = (uintptr_t)devi->handle; break; default: return (gpiobus_read_ivar(dev, child, which, result)); @@ -402,6 +405,28 @@ acpi_gpiobus_read_ivar(device_t dev, device_t child, int which, return (0); } +static device_t +acpi_gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +{ + return (gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct acpi_gpiobus_ivar))); +} + +static int +acpi_gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) +{ + struct acpi_gpiobus_ivar *devi; + int err; + + err = gpiobus_child_location(bus, child, sb); + if (err != 0) + return (err); + + devi = device_get_ivars(child); + sbuf_printf(sb, " handle=%s", acpi_name(devi->handle)); + return (0); +} + static device_method_t acpi_gpiobus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_gpiobus_probe), @@ -410,6 +435,8 @@ static device_method_t acpi_gpiobus_methods[] = { /* Bus interface */ DEVMETHOD(bus_read_ivar, acpi_gpiobus_read_ivar), + DEVMETHOD(bus_add_child, acpi_gpiobus_add_child), + DEVMETHOD(bus_child_location, acpi_gpiobus_child_location), DEVMETHOD_END }; diff --git a/sys/dev/gpio/acpi_gpiobusvar.h b/sys/dev/gpio/acpi_gpiobusvar.h index f8d502eab9d1..288e8bd0f2af 100644 --- a/sys/dev/gpio/acpi_gpiobusvar.h +++ b/sys/dev/gpio/acpi_gpiobusvar.h @@ -33,16 +33,16 @@ #include <contrib/dev/acpica/include/acpi.h> enum acpi_gpiobus_ivars { - ACPI_GPIOBUS_IVAR_HANDLE = 10600, - ACPI_GPIOBUS_IVAR_FLAGS, + ACPI_GPIOBUS_IVAR_HANDLE = 10600 }; #define ACPI_GPIOBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(acpi_gpiobus, var, ACPI_GPIOBUS, ivar, type) ACPI_GPIOBUS_ACCESSOR(handle, HANDLE, ACPI_HANDLE) -ACPI_GPIOBUS_ACCESSOR(flags, FLAGS, uint32_t) #undef ACPI_GPIOBUS_ACCESSOR +uint32_t acpi_gpiobus_convflags(ACPI_RESOURCE_GPIO *); + #endif /* __ACPI_GPIOBUS_H__ */ diff --git a/sys/dev/gpio/gpioaei.c b/sys/dev/gpio/gpioaei.c index ecae8ccaf2fa..7b97277aaf61 100644 --- a/sys/dev/gpio/gpioaei.c +++ b/sys/dev/gpio/gpioaei.c @@ -45,13 +45,21 @@ enum gpio_aei_type { ACPI_AEI_TYPE_EVT }; -struct gpio_aei_softc { - ACPI_HANDLE handle; - enum gpio_aei_type type; - int pin; +struct gpio_aei_ctx { + SLIST_ENTRY(gpio_aei_ctx) next; struct resource * intr_res; - int intr_rid; void * intr_cookie; + ACPI_HANDLE handle; + gpio_pin_t gpio; + uint32_t pin; + int intr_rid; + enum gpio_aei_type type; +}; + +struct gpio_aei_softc { + SLIST_HEAD(, gpio_aei_ctx) aei_ctx; + ACPI_HANDLE dev_handle; + device_t dev; }; static int @@ -65,69 +73,157 @@ gpio_aei_probe(device_t dev) static void gpio_aei_intr(void * arg) { - struct gpio_aei_softc * sc = arg; + struct gpio_aei_ctx * ctx = arg; /* Ask ACPI to run the appropriate _EVT, _Exx or _Lxx method. */ - if (sc->type == ACPI_AEI_TYPE_EVT) - acpi_SetInteger(sc->handle, NULL, sc->pin); + if (ctx->type == ACPI_AEI_TYPE_EVT) + acpi_SetInteger(ctx->handle, NULL, ctx->pin); else - AcpiEvaluateObject(sc->handle, NULL, NULL, NULL); + AcpiEvaluateObject(ctx->handle, NULL, NULL, NULL); +} + +static ACPI_STATUS +gpio_aei_enumerate(ACPI_RESOURCE * res, void * context) +{ + ACPI_RESOURCE_GPIO * gpio_res = &res->Data.Gpio; + struct gpio_aei_softc * sc = context; + uint32_t flags, maxpin; + device_t busdev; + int err; + + /* + * Check that we have a GpioInt object. + * Note that according to the spec this + * should always be the case. + */ + if (res->Type != ACPI_RESOURCE_TYPE_GPIO) + return (AE_OK); + if (gpio_res->ConnectionType != ACPI_RESOURCE_GPIO_TYPE_INT) + return (AE_OK); + + flags = acpi_gpiobus_convflags(gpio_res); + if (acpi_quirks & ACPI_Q_AEI_NOPULL) + flags &= ~GPIO_PIN_PULLUP; + + err = GPIO_PIN_MAX(acpi_get_device(sc->dev_handle), &maxpin); + if (err != 0) + return (AE_ERROR); + + busdev = GPIO_GET_BUS(acpi_get_device(sc->dev_handle)); + for (int i = 0; i < gpio_res->PinTableLength; i++) { + struct gpio_aei_ctx * ctx; + uint32_t pin = gpio_res->PinTable[i]; + + if (__predict_false(pin > maxpin)) { + device_printf(sc->dev, + "Invalid pin 0x%x, max: 0x%x (bad ACPI tables?)\n", + pin, maxpin); + continue; + } + + ctx = malloc(sizeof(struct gpio_aei_ctx), M_DEVBUF, M_WAITOK); + ctx->type = ACPI_AEI_TYPE_UNKNOWN; + if (pin <= 255) { + char objname[5]; /* "_EXX" or "_LXX" */ + sprintf(objname, "_%c%02X", + (flags & GPIO_INTR_EDGE_MASK) ? 'E' : 'L', pin); + if (ACPI_SUCCESS(AcpiGetHandle(sc->dev_handle, objname, + &ctx->handle))) + ctx->type = ACPI_AEI_TYPE_ELX; + } + + if (ctx->type == ACPI_AEI_TYPE_UNKNOWN) { + if (ACPI_SUCCESS(AcpiGetHandle(sc->dev_handle, "_EVT", + &ctx->handle))) + ctx->type = ACPI_AEI_TYPE_EVT; + else { + device_printf(sc->dev, + "AEI Device type is unknown for pin 0x%x\n", + pin); + + free(ctx, M_DEVBUF); + continue; + } + } + + err = gpio_pin_get_by_bus_pinnum(busdev, pin, &ctx->gpio); + if (err != 0) { + device_printf(sc->dev, "Cannot acquire pin 0x%x\n", + pin); + + free(ctx, M_DEVBUF); + continue; + } + + err = gpio_pin_setflags(ctx->gpio, flags & ~GPIO_INTR_MASK); + if (err != 0) { + device_printf(sc->dev, + "Cannot set pin flags for pin 0x%x\n", pin); + + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + ctx->intr_rid = 0; + ctx->intr_res = gpio_alloc_intr_resource(sc->dev, + &ctx->intr_rid, RF_ACTIVE, ctx->gpio, + flags & GPIO_INTR_MASK); + if (ctx->intr_res == NULL) { + device_printf(sc->dev, + "Cannot allocate an IRQ for pin 0x%x\n", pin); + + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + err = bus_setup_intr(sc->dev, ctx->intr_res, INTR_TYPE_MISC | + INTR_MPSAFE | INTR_EXCL | INTR_SLEEPABLE, NULL, + gpio_aei_intr, ctx, &ctx->intr_cookie); + if (err != 0) { + device_printf(sc->dev, + "Cannot set up an IRQ for pin 0x%x\n", pin); + + bus_release_resource(sc->dev, ctx->intr_res); + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + continue; + } + + ctx->pin = pin; + SLIST_INSERT_HEAD(&sc->aei_ctx, ctx, next); + } + + return (AE_OK); } static int gpio_aei_attach(device_t dev) { struct gpio_aei_softc * sc = device_get_softc(dev); - gpio_pin_t pin; - uint32_t flags; ACPI_HANDLE handle; - int err; + ACPI_STATUS status; /* This is us. */ device_set_desc(dev, "ACPI Event Information Device"); - /* Store parameters needed by gpio_aei_intr. */ handle = acpi_gpiobus_get_handle(dev); - if (gpio_pin_get_by_child_index(dev, 0, &pin) != 0) { - device_printf(dev, "Unable to get the input pin\n"); + status = AcpiGetParent(handle, &sc->dev_handle); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Cannot get parent of %s\n", + acpi_name(handle)); return (ENXIO); } - sc->type = ACPI_AEI_TYPE_UNKNOWN; - sc->pin = pin->pin; - - flags = acpi_gpiobus_get_flags(dev); - if (pin->pin <= 255) { - char objname[5]; /* "_EXX" or "_LXX" */ - sprintf(objname, "_%c%02X", - (flags & GPIO_INTR_EDGE_MASK) ? 'E' : 'L', pin->pin); - if (ACPI_SUCCESS(AcpiGetHandle(handle, objname, &sc->handle))) - sc->type = ACPI_AEI_TYPE_ELX; - } - if (sc->type == ACPI_AEI_TYPE_UNKNOWN) { - if (ACPI_SUCCESS(AcpiGetHandle(handle, "_EVT", &sc->handle))) - sc->type = ACPI_AEI_TYPE_EVT; - } - - if (sc->type == ACPI_AEI_TYPE_UNKNOWN) { - device_printf(dev, "ACPI Event Information Device type is unknown"); - return (ENOTSUP); - } + SLIST_INIT(&sc->aei_ctx); + sc->dev = dev; - /* Set up the interrupt. */ - if ((sc->intr_res = gpio_alloc_intr_resource(dev, &sc->intr_rid, - RF_ACTIVE, pin, flags & GPIO_INTR_MASK)) == NULL) { - device_printf(dev, "Cannot allocate an IRQ\n"); - return (ENOTSUP); - } - err = bus_setup_intr(dev, sc->intr_res, INTR_TYPE_MISC | INTR_MPSAFE | - INTR_EXCL | INTR_SLEEPABLE, NULL, gpio_aei_intr, sc, - &sc->intr_cookie); - if (err != 0) { - device_printf(dev, "Cannot set up IRQ\n"); - bus_release_resource(dev, SYS_RES_IRQ, sc->intr_rid, - sc->intr_res); - return (err); + status = AcpiWalkResources(sc->dev_handle, "_AEI", + gpio_aei_enumerate, sc); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Failed to enumerate AEI resources\n"); + return (ENXIO); } return (0); @@ -137,9 +233,15 @@ static int gpio_aei_detach(device_t dev) { struct gpio_aei_softc * sc = device_get_softc(dev); + struct gpio_aei_ctx * ctx, * tctx; + + SLIST_FOREACH_SAFE(ctx, &sc->aei_ctx, next, tctx) { + bus_teardown_intr(dev, ctx->intr_res, ctx->intr_cookie); + bus_release_resource(dev, ctx->intr_res); + gpio_pin_release(ctx->gpio); + free(ctx, M_DEVBUF); + } - bus_teardown_intr(dev, sc->intr_res, sc->intr_cookie); - bus_release_resource(dev, SYS_RES_IRQ, sc->intr_rid, sc->intr_res); return (0); } diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index c25c41f43042..5f1f6532a79b 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -57,7 +57,6 @@ static int gpiobus_suspend(device_t); static int gpiobus_resume(device_t); static void gpiobus_probe_nomatch(device_t, device_t); static int gpiobus_print_child(device_t, device_t); -static int gpiobus_child_location(device_t, device_t, struct sbuf *); static device_t gpiobus_add_child(device_t, u_int, const char *, int); static void gpiobus_hinted_child(device_t, const char *, int); @@ -662,7 +661,7 @@ gpiobus_print_child(device_t dev, device_t child) return (retval); } -static int +int gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) { struct gpiobus_ivar *devi; @@ -674,16 +673,19 @@ gpiobus_child_location(device_t bus, device_t child, struct sbuf *sb) return (0); } -static device_t -gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +device_t +gpiobus_add_child_common(device_t dev, u_int order, const char *name, int unit, + size_t ivars_size) { device_t child; struct gpiobus_ivar *devi; + KASSERT(ivars_size >= sizeof(struct gpiobus_ivar), + ("child ivars must include gpiobus_ivar as their first member")); child = device_add_child_ordered(dev, order, name, unit); if (child == NULL) return (child); - devi = malloc(sizeof(struct gpiobus_ivar), M_DEVBUF, M_NOWAIT | M_ZERO); + devi = malloc(ivars_size, M_DEVBUF, M_NOWAIT | M_ZERO); if (devi == NULL) { device_delete_child(dev, child); return (NULL); @@ -694,6 +696,13 @@ gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) return (child); } +static device_t +gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) +{ + return (gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct gpiobus_ivar))); +} + static void gpiobus_child_deleted(device_t dev, device_t child) { diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h index de3f57663132..c198e5f79989 100644 --- a/sys/dev/gpio/gpiobus_internal.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -42,6 +42,8 @@ void gpiobus_free_ivars(struct gpiobus_ivar *); int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); int gpiobus_acquire_pin(device_t, uint32_t); void gpiobus_release_pin(device_t, uint32_t); +int gpiobus_child_location(device_t, device_t, struct sbuf *); +device_t gpiobus_add_child_common(device_t, u_int, const char *, int, size_t); extern driver_t gpiobus_driver; #endif diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index fc5fb03d6824..b12b78fac18c 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -451,28 +451,22 @@ ofw_gpiobus_add_child(device_t dev, u_int order, const char *name, int unit) device_t child; struct ofw_gpiobus_devinfo *devi; - child = device_add_child_ordered(dev, order, name, unit); + child = gpiobus_add_child_common(dev, order, name, unit, + sizeof(struct ofw_gpiobus_devinfo)); if (child == NULL) - return (child); - devi = malloc(sizeof(struct ofw_gpiobus_devinfo), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (devi == NULL) { - device_delete_child(dev, child); - return (0); - } + return (NULL); /* * NULL all the OFW-related parts of the ivars for non-OFW * children. */ + devi = device_get_ivars(child); devi->opd_obdinfo.obd_node = -1; devi->opd_obdinfo.obd_name = NULL; devi->opd_obdinfo.obd_compat = NULL; devi->opd_obdinfo.obd_type = NULL; devi->opd_obdinfo.obd_model = NULL; - device_set_ivars(child, devi); - return (child); } diff --git a/sys/dev/hid/hkbd.c b/sys/dev/hid/hkbd.c index 5eff7557bc42..6255c42d3b62 100644 --- a/sys/dev/hid/hkbd.c +++ b/sys/dev/hid/hkbd.c @@ -95,14 +95,16 @@ #ifdef HID_DEBUG static int hkbd_debug = 0; +#endif static int hkbd_no_leds = 0; static SYSCTL_NODE(_hw_hid, OID_AUTO, hkbd, CTLFLAG_RW, 0, "USB keyboard"); +#ifdef HID_DEBUG SYSCTL_INT(_hw_hid_hkbd, OID_AUTO, debug, CTLFLAG_RWTUN, &hkbd_debug, 0, "Debug level"); +#endif SYSCTL_INT(_hw_hid_hkbd, OID_AUTO, no_leds, CTLFLAG_RWTUN, &hkbd_no_leds, 0, "Disables setting of keyboard leds"); -#endif #define INPUT_EPOCH global_epoch_preempt @@ -1596,8 +1598,16 @@ hkbd_ioctl_locked(keyboard_t *kbd, u_long cmd, caddr_t arg) sc->sc_state &= ~LOCK_MASK; sc->sc_state |= *(int *)arg; - /* set LEDs and quit */ - return (hkbd_ioctl_locked(kbd, KDSETLED, arg)); + /* + * Attempt to set the keyboard LEDs; ignore the return value + * intentionally. Note: Some hypervisors/emulators (e.g., QEMU, + * Parallels—at least as of the time of writing) may fail when + * setting LEDs. This can prevent kbdmux from attaching the + * keyboard, which in turn may block the console from accessing + * it. + */ + (void)hkbd_ioctl_locked(kbd, KDSETLED, arg); + return (0); case KDSETREPEAT: /* set keyboard repeat rate (new * interface) */ @@ -1766,10 +1776,8 @@ hkbd_set_leds(struct hkbd_softc *sc, uint8_t leds) SYSCONS_LOCK_ASSERT(); DPRINTF("leds=0x%02x\n", leds); -#ifdef HID_DEBUG if (hkbd_no_leds) return (0); -#endif memset(sc->sc_buffer, 0, HKBD_BUFFER_SIZE); @@ -1820,6 +1828,7 @@ hkbd_set_leds(struct hkbd_softc *sc, uint8_t leds) SYSCONS_UNLOCK(); error = hid_write(sc->sc_dev, buf, len); SYSCONS_LOCK(); + DPRINTF("error %d", error); return (error); } diff --git a/sys/dev/iwx/if_iwx.c b/sys/dev/iwx/if_iwx.c index d60ef1874a6c..1fe531d69933 100644 --- a/sys/dev/iwx/if_iwx.c +++ b/sys/dev/iwx/if_iwx.c @@ -5673,6 +5673,10 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (rinfo == NULL) return EINVAL; + /* Offloaded sequence number assignment */ + /* Note: Should be done in firmware on all supported devices */ + + /* Radiotap */ if (ieee80211_radiotap_active_vap(vap)) { struct iwx_tx_radiotap_header *tap = &sc->sc_txtap; @@ -5685,6 +5689,7 @@ iwx_tx(struct iwx_softc *sc, struct mbuf *m, struct ieee80211_node *ni) ieee80211_radiotap_tx(vap, m); } + /* Encrypt - CCMP via direct HW path, TKIP/WEP indirected openbsd-style for now */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_get_txkey(ni, m); if (k == NULL) { @@ -10467,6 +10472,8 @@ iwx_attach(device_t dev) IEEE80211_C_BGSCAN /* capable of bg scanning */ ; ic->ic_flags_ext = IEEE80211_FEXT_SCAN_OFFLOAD; + /* Enable seqno offload */ + ic->ic_flags_ext |= IEEE80211_FEXT_SEQNO_OFFLOAD; ic->ic_txstream = 2; ic->ic_rxstream = 2; diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c index 978e5f25ceaf..cc0bc1f3fcd2 100644 --- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c +++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_offload.c @@ -120,7 +120,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, switch (attrs->dir) { case IPSEC_DIR_OUTBOUND: - if (attrs->replay_esn.replay_window != 0) + if (attrs->replay_esn.trigger) MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); else MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_MODE); diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index bf14bfdb73ea..9c06f7fec530 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -27,8 +27,9 @@ /* Driver for ptnet paravirtualized network device. */ #include <sys/cdefs.h> +#include "opt_inet.h" +#include "opt_inet6.h" -#include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> @@ -75,9 +76,6 @@ #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> -#include "opt_inet.h" -#include "opt_inet6.h" - #include <sys/selinfo.h> #include <net/netmap.h> #include <dev/netmap/netmap_kern.h> diff --git a/sys/dev/virtio/mmio/virtio_mmio.c b/sys/dev/virtio/mmio/virtio_mmio.c index 5a81c8a24779..fe531fced998 100644 --- a/sys/dev/virtio/mmio/virtio_mmio.c +++ b/sys/dev/virtio/mmio/virtio_mmio.c @@ -53,7 +53,6 @@ #include <dev/virtio/virtqueue.h> #include <dev/virtio/mmio/virtio_mmio.h> -#include "virtio_mmio_if.h" #include "virtio_bus_if.h" #include "virtio_if.h" @@ -79,7 +78,6 @@ static int vtmmio_alloc_virtqueues(device_t, int, struct vq_alloc_info *); static int vtmmio_setup_intr(device_t, enum intr_type); static void vtmmio_stop(device_t); -static void vtmmio_poll(device_t); static int vtmmio_reinit(device_t, uint64_t); static void vtmmio_reinit_complete(device_t); static void vtmmio_notify_virtqueue(device_t, uint16_t, bus_size_t); @@ -104,29 +102,11 @@ static void vtmmio_vq_intr(void *); * I/O port read/write wrappers. */ #define vtmmio_write_config_1(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_1((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_1((sc)->res[0], (o), (v)) #define vtmmio_write_config_2(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_2((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_2((sc)->res[0], (o), (v)) #define vtmmio_write_config_4(sc, o, v) \ -do { \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_PREWRITE(sc->platform, (o), (v)); \ - bus_write_4((sc)->res[0], (o), (v)); \ - if (sc->platform != NULL) \ - VIRTIO_MMIO_NOTE(sc->platform, (o), (v)); \ -} while (0) + bus_write_4((sc)->res[0], (o), (v)) #define vtmmio_read_config_1(sc, o) \ bus_read_1((sc)->res[0], (o)) @@ -157,7 +137,6 @@ static device_method_t vtmmio_methods[] = { DEVMETHOD(virtio_bus_alloc_virtqueues, vtmmio_alloc_virtqueues), DEVMETHOD(virtio_bus_setup_intr, vtmmio_setup_intr), DEVMETHOD(virtio_bus_stop, vtmmio_stop), - DEVMETHOD(virtio_bus_poll, vtmmio_poll), DEVMETHOD(virtio_bus_reinit, vtmmio_reinit), DEVMETHOD(virtio_bus_reinit_complete, vtmmio_reinit_complete), DEVMETHOD(virtio_bus_notify_vq, vtmmio_notify_virtqueue), @@ -220,19 +199,9 @@ vtmmio_setup_intr(device_t dev, enum intr_type type) { struct vtmmio_softc *sc; int rid; - int err; sc = device_get_softc(dev); - if (sc->platform != NULL) { - err = VIRTIO_MMIO_SETUP_INTR(sc->platform, sc->dev, - vtmmio_vq_intr, sc); - if (err == 0) { - /* Okay we have backend-specific interrupts */ - return (0); - } - } - rid = 0; sc->res[1] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); @@ -597,17 +566,6 @@ vtmmio_stop(device_t dev) vtmmio_reset(device_get_softc(dev)); } -static void -vtmmio_poll(device_t dev) -{ - struct vtmmio_softc *sc; - - sc = device_get_softc(dev); - - if (sc->platform != NULL) - VIRTIO_MMIO_POLL(sc->platform); -} - static int vtmmio_reinit(device_t dev, uint64_t features) { diff --git a/sys/dev/virtio/mmio/virtio_mmio.h b/sys/dev/virtio/mmio/virtio_mmio.h index ac6a96c1c7fe..edcbf0519acc 100644 --- a/sys/dev/virtio/mmio/virtio_mmio.h +++ b/sys/dev/virtio/mmio/virtio_mmio.h @@ -37,7 +37,6 @@ struct vtmmio_virtqueue; struct vtmmio_softc { device_t dev; - device_t platform; struct resource *res[2]; uint64_t vtmmio_features; diff --git a/sys/dev/virtio/mmio/virtio_mmio_fdt.c b/sys/dev/virtio/mmio/virtio_mmio_fdt.c index 7fba8aad8db8..bb9ea8efbaeb 100644 --- a/sys/dev/virtio/mmio/virtio_mmio_fdt.c +++ b/sys/dev/virtio/mmio/virtio_mmio_fdt.c @@ -63,12 +63,10 @@ #include <dev/virtio/mmio/virtio_mmio.h> static int vtmmio_fdt_probe(device_t); -static int vtmmio_fdt_attach(device_t); static device_method_t vtmmio_fdt_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, vtmmio_fdt_probe), - DEVMETHOD(device_attach, vtmmio_fdt_attach), DEVMETHOD_END }; @@ -93,48 +91,3 @@ vtmmio_fdt_probe(device_t dev) return (vtmmio_probe(dev)); } - -static int -vtmmio_setup_platform(device_t dev, struct vtmmio_softc *sc) -{ - phandle_t platform_node; - struct fdt_ic *ic; - phandle_t xref; - phandle_t node; - - sc->platform = NULL; - - if ((node = ofw_bus_get_node(dev)) == -1) - return (ENXIO); - - if (OF_searchencprop(node, "platform", &xref, - sizeof(xref)) == -1) { - return (ENXIO); - } - - platform_node = OF_node_from_xref(xref); - - SLIST_FOREACH(ic, &fdt_ic_list_head, fdt_ics) { - if (ic->iph == platform_node) { - sc->platform = ic->dev; - break; - } - } - - if (sc->platform == NULL) { - /* No platform-specific device. Ignore it. */ - } - - return (0); -} - -static int -vtmmio_fdt_attach(device_t dev) -{ - struct vtmmio_softc *sc; - - sc = device_get_softc(dev); - vtmmio_setup_platform(dev, sc); - - return (vtmmio_attach(dev)); -} diff --git a/sys/dev/virtio/mmio/virtio_mmio_if.m b/sys/dev/virtio/mmio/virtio_mmio_if.m deleted file mode 100644 index baebbd9a0b1c..000000000000 --- a/sys/dev/virtio/mmio/virtio_mmio_if.m +++ /dev/null @@ -1,99 +0,0 @@ -#- -# Copyright (c) 2014 Ruslan Bukin <br@bsdpad.com> -# All rights reserved. -# -# This software was developed by SRI International and the University of -# Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) -# ("CTSRD"), as part of the DARPA CRASH research programme. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# - -#include <sys/types.h> - -# -# This is optional interface to virtio mmio backend. -# Useful when backend is implemented not by the hardware but software, e.g. -# by using another cpu core. -# - -INTERFACE virtio_mmio; - -CODE { - static int - virtio_mmio_prewrite(device_t dev, size_t offset, int val) - { - - return (1); - } - - static int - virtio_mmio_note(device_t dev, size_t offset, int val) - { - - return (1); - } - - static int - virtio_mmio_setup_intr(device_t dev, device_t mmio_dev, - void *handler, void *ih_user) - { - - return (1); - } -}; - -# -# Inform backend we are going to write data at offset. -# -METHOD int prewrite { - device_t dev; - size_t offset; - int val; -} DEFAULT virtio_mmio_prewrite; - -# -# Inform backend we have data wrotten to offset. -# -METHOD int note { - device_t dev; - size_t offset; - int val; -} DEFAULT virtio_mmio_note; - -# -# Inform backend we are going to poll virtqueue. -# -METHOD int poll { - device_t dev; -}; - -# -# Setup backend-specific interrupts. -# -METHOD int setup_intr { - device_t dev; - device_t mmio_dev; - void *handler; - void *ih_user; -} DEFAULT virtio_mmio_setup_intr; diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 2ff9be9680b8..ecb3dbb370e5 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -28,6 +28,9 @@ /* Driver for VirtIO network devices. */ +#include "opt_inet.h" +#include "opt_inet6.h" + #include <sys/param.h> #include <sys/eventhandler.h> #include <sys/systm.h> @@ -82,9 +85,6 @@ #include <dev/virtio/network/if_vtnetvar.h> #include "virtio_if.h" -#include "opt_inet.h" -#include "opt_inet6.h" - #if defined(INET) || defined(INET6) #include <machine/in_cksum.h> #endif diff --git a/sys/dev/virtio/virtio_bus_if.m b/sys/dev/virtio/virtio_bus_if.m index 57ae90bdc917..4181b641faad 100644 --- a/sys/dev/virtio/virtio_bus_if.m +++ b/sys/dev/virtio/virtio_bus_if.m @@ -109,7 +109,3 @@ METHOD void write_device_config { int len; }; -METHOD void poll { - device_t dev; -}; - diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c index 8cc3326dc08e..cc7a233d60ee 100644 --- a/sys/dev/virtio/virtqueue.c +++ b/sys/dev/virtio/virtqueue.c @@ -605,10 +605,8 @@ virtqueue_poll(struct virtqueue *vq, uint32_t *len) { void *cookie; - VIRTIO_BUS_POLL(vq->vq_dev); while ((cookie = virtqueue_dequeue(vq, len)) == NULL) { cpu_spinwait(); - VIRTIO_BUS_POLL(vq->vq_dev); } return (cookie); diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c index a496b41fcf6e..4a2b80a7ccdd 100644 --- a/sys/fs/cd9660/cd9660_vnops.c +++ b/sys/fs/cd9660/cd9660_vnops.c @@ -191,7 +191,7 @@ cd9660_getattr(struct vop_getattr_args *ap) vap->va_atime = ip->inode.iso_atime; vap->va_mtime = ip->inode.iso_mtime; vap->va_ctime = ip->inode.iso_ctime; - vap->va_rdev = ip->inode.iso_rdev; + vap->va_rdev = VN_ISDEV(vp) ? ip->inode.iso_rdev : NODEV; vap->va_size = (u_quad_t) ip->i_size; if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) { diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 5a019b4989cf..4e9aae70da6f 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -1017,7 +1017,7 @@ MALLOC_DECLARE(M_NEWNFSDSESSION); int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, int, int); int newnfs_realign(struct mbuf **, int); bool ncl_pager_setsize(struct vnode *vp, u_quad_t *nsizep); -void ncl_copy_vattr(struct vattr *dst, struct vattr *src); +void ncl_copy_vattr(struct vnode *vp, struct vattr *dst, struct vattr *src); /* * If the port runs on an SMP box that can enforce Atomic ops with low diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 704aeeeabdf2..e9f1dc23ddbe 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -412,7 +412,7 @@ nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, } void -ncl_copy_vattr(struct vattr *dst, struct vattr *src) +ncl_copy_vattr(struct vnode *vp, struct vattr *dst, struct vattr *src) { dst->va_type = src->va_type; dst->va_mode = src->va_mode; @@ -429,7 +429,7 @@ ncl_copy_vattr(struct vattr *dst, struct vattr *src) dst->va_birthtime = src->va_birthtime; dst->va_gen = src->va_gen; dst->va_flags = src->va_flags; - dst->va_rdev = src->va_rdev; + dst->va_rdev = VN_ISDEV(vp) ? src->va_rdev : NODEV; dst->va_bytes = src->va_bytes; dst->va_filerev = src->va_filerev; } @@ -595,7 +595,7 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { - ncl_copy_vattr(vaper, vap); + ncl_copy_vattr(vp, vaper, vap); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 52f72dc43c3f..a8b06fdb261b 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -1026,7 +1026,7 @@ nfs_getattr(struct vop_getattr_args *ap) * cached attributes should be ignored. */ if (nmp->nm_fhsize > 0 && ncl_getattrcache(vp, &vattr) == 0) { - ncl_copy_vattr(vap, &vattr); + ncl_copy_vattr(vp, vap, &vattr); /* * Get the local modify time for the case of a write diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 2690ad3b2679..5a53fac50f2c 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -84,6 +84,13 @@ #define ELF_NOTE_ROUNDSIZE 4 #define OLD_EI_BRAND 8 +/* + * ELF_ABI_NAME is a string name of the ELF ABI. ELF_ABI_ID is used + * to build variable names. + */ +#define ELF_ABI_NAME __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) +#define ELF_ABI_ID __CONCAT(elf, __ELF_WORD_SIZE) + static int __elfN(check_header)(const Elf_Ehdr *hdr); static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, const char *interp, int32_t *osrel, uint32_t *fctl0); @@ -104,14 +111,15 @@ static Elf_Word __elfN(untrans_prot)(vm_prot_t); static size_t __elfN(prepare_register_notes)(struct thread *td, struct note_info_list *list, struct thread *target_td); -SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), - CTLFLAG_RW | CTLFLAG_MPSAFE, 0, +SYSCTL_NODE(_kern, OID_AUTO, ELF_ABI_ID, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, ""); +#define ELF_NODE_OID __CONCAT(_kern_, ELF_ABI_ID) + int __elfN(fallback_brand) = -1; -SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, +SYSCTL_INT(ELF_NODE_OID, OID_AUTO, fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); + ELF_ABI_NAME " brand of last resort"); static int elf_legacy_coredump = 0; SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, @@ -126,22 +134,22 @@ int __elfN(nxstack) = #else 0; #endif -SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, +SYSCTL_INT(ELF_NODE_OID, OID_AUTO, nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": support PT_GNU_STACK for non-executable stack control"); + ELF_ABI_NAME ": support PT_GNU_STACK for non-executable stack control"); #if defined(__amd64__) static int __elfN(vdso) = 1; -SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, +SYSCTL_INT(ELF_NODE_OID, OID_AUTO, vdso, CTLFLAG_RWTUN, &__elfN(vdso), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable vdso preloading"); + ELF_ABI_NAME ": enable vdso preloading"); #else static int __elfN(vdso) = 0; #endif #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) int i386_read_exec = 0; -SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, +SYSCTL_INT(ELF_NODE_OID, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif @@ -161,15 +169,15 @@ sysctl_pie_base(SYSCTL_HANDLER_ARGS) __elfN(pie_base) = val; return (0); } -SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base, +SYSCTL_PROC(ELF_NODE_OID, OID_AUTO, pie_base, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_pie_base, "LU", "PIE load base without randomization"); -SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, +SYSCTL_NODE(ELF_NODE_OID, OID_AUTO, aslr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, ""); -#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) +#define ASLR_NODE_OID __CONCAT(ELF_NODE_OID, _aslr) /* * Enable ASLR by default for 64-bit non-PIE binaries. 32-bit architectures @@ -179,8 +187,7 @@ SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, static int __elfN(aslr_enabled) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, &__elfN(aslr_enabled), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": enable address map randomization"); + ELF_ABI_NAME ": enable address map randomization"); /* * Enable ASLR by default for 64-bit PIE binaries. @@ -188,8 +195,7 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, static int __elfN(pie_aslr_enabled) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, &__elfN(pie_aslr_enabled), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": enable address map randomization for PIE binaries"); + ELF_ABI_NAME ": enable address map randomization for PIE binaries"); /* * Sbrk is deprecated and it can be assumed that in most cases it will not be @@ -199,27 +205,25 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, static int __elfN(aslr_honor_sbrk) = 0; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, &__elfN(aslr_honor_sbrk), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); + ELF_ABI_NAME ": assume sbrk is used"); static int __elfN(aslr_stack) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN, &__elfN(aslr_stack), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": enable stack address randomization"); + ELF_ABI_NAME ": enable stack address randomization"); static int __elfN(aslr_shared_page) = __ELF_WORD_SIZE == 64; SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, shared_page, CTLFLAG_RWTUN, &__elfN(aslr_shared_page), 0, - __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) - ": enable shared page address randomization"); + ELF_ABI_NAME ": enable shared page address randomization"); static int __elfN(sigfastblock) = 1; -SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, +SYSCTL_INT(ELF_NODE_OID, OID_AUTO, sigfastblock, CTLFLAG_RWTUN, &__elfN(sigfastblock), 0, "enable sigfastblock for new processes"); static bool __elfN(allow_wx) = true; -SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx, +SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx, CTLFLAG_RWTUN, &__elfN(allow_wx), 0, "Allow pages to be mapped simultaneously writable and executable"); @@ -2951,9 +2955,9 @@ __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote, */ static struct execsw __elfN(execsw) = { .ex_imgact = __CONCAT(exec_, __elfN(imgact)), - .ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) + .ex_name = ELF_ABI_NAME }; -EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); +EXEC_SET(ELF_ABI_ID, __elfN(execsw)); static vm_prot_t __elfN(trans_prot)(Elf_Word flags) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 0056dac65c7d..19870e989437 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -154,15 +154,12 @@ static struct task unp_defer_task; * and don't really want to reserve the sendspace. Their recvspace should be * large enough for at least one max-size datagram plus address. */ -#ifndef PIPSIZ -#define PIPSIZ 8192 -#endif -static u_long unpst_sendspace = PIPSIZ; -static u_long unpst_recvspace = PIPSIZ; +static u_long unpst_sendspace = 64*1024; +static u_long unpst_recvspace = 64*1024; static u_long unpdg_maxdgram = 8*1024; /* support 8KB syslog msgs */ static u_long unpdg_recvspace = 16*1024; -static u_long unpsp_sendspace = PIPSIZ; -static u_long unpsp_recvspace = PIPSIZ; +static u_long unpsp_sendspace = 64*1024; +static u_long unpsp_recvspace = 64*1024; static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Local domain"); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 93f87ddae4de..a4f41192f684 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -3444,7 +3444,7 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp, dat = NULL; if ((flags & COPY_FILE_RANGE_CLONE) != 0) { - error = ENOSYS; + error = EOPNOTSUPP; goto out; } diff --git a/sys/modules/e6000sw/Makefile b/sys/modules/e6000sw/Makefile index da08f80b0a29..73cbaea801f0 100644 --- a/sys/modules/e6000sw/Makefile +++ b/sys/modules/e6000sw/Makefile @@ -3,6 +3,6 @@ KMOD= e6000sw SRCS= e6000sw.c -SRCS+= bus_if.h etherswitch_if.h mdio_if.h miibus_if.h ofw_bus_if.h opt_platform.h +SRCS+= bus_if.h device_if.h etherswitch_if.h mdio_if.h miibus_if.h ofw_bus_if.h opt_platform.h .include <bsd.kmod.mk> diff --git a/sys/modules/etherswitch/Makefile b/sys/modules/etherswitch/Makefile index 087231545cd4..0b16a19e5117 100644 --- a/sys/modules/etherswitch/Makefile +++ b/sys/modules/etherswitch/Makefile @@ -3,7 +3,7 @@ KMOD = etherswitch SRCS= etherswitch.c -SRCS+= mdio_if.h miibus_if.h etherswitch_if.h etherswitch_if.c +SRCS+= bus_if.h device_if.h mdio_if.h miibus_if.h etherswitch_if.h etherswitch_if.c CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch .include <bsd.kmod.mk> diff --git a/sys/modules/evdev/Makefile b/sys/modules/evdev/Makefile index bd66013885db..20813b73f6dd 100644 --- a/sys/modules/evdev/Makefile +++ b/sys/modules/evdev/Makefile @@ -2,7 +2,7 @@ KMOD= evdev SRCS= cdev.c evdev.c evdev_mt.c evdev_utils.c -SRCS+= opt_evdev.h bus_if.h device_if.h +SRCS+= opt_evdev.h opt_kbd.h bus_if.h device_if.h EXPORT_SYMS= YES diff --git a/sys/modules/gpio/gpioaei/Makefile b/sys/modules/gpio/gpioaei/Makefile index 8f856af48eb7..1f0f1d0e53a6 100644 --- a/sys/modules/gpio/gpioaei/Makefile +++ b/sys/modules/gpio/gpioaei/Makefile @@ -10,6 +10,8 @@ SRCS+= \ gpio_if.h \ gpiobus_if.h +SRCS+= opt_acpi.h opt_platform.h + CFLAGS+= -I. -I${SRCTOP}/sys/dev/gpio/ .include <bsd.kmod.mk> diff --git a/sys/modules/gve/Makefile b/sys/modules/gve/Makefile index 08b26a994e36..ece275485df7 100644 --- a/sys/modules/gve/Makefile +++ b/sys/modules/gve/Makefile @@ -40,5 +40,5 @@ SRCS= gve_main.c \ gve_tx_dqo.c \ gve_sysctl.c SRCS+= device_if.h bus_if.h pci_if.h - +SRCS+= opt_inet6.h .include <bsd.kmod.mk> diff --git a/sys/modules/if_infiniband/Makefile b/sys/modules/if_infiniband/Makefile index 01e3164b1271..7ec343999da1 100644 --- a/sys/modules/if_infiniband/Makefile +++ b/sys/modules/if_infiniband/Makefile @@ -3,7 +3,8 @@ KMOD= if_infiniband SRCS= if_infiniband.c \ opt_inet.h \ - opt_inet6.h + opt_inet6.h \ + opt_kbd.h EXPORT_SYMS= YES diff --git a/sys/modules/if_vlan/Makefile b/sys/modules/if_vlan/Makefile index 3077f4289d5a..0cdab3f7653a 100644 --- a/sys/modules/if_vlan/Makefile +++ b/sys/modules/if_vlan/Makefile @@ -2,6 +2,6 @@ KMOD= if_vlan SRCS= if_vlan.c -SRCS+= opt_inet.h opt_inet6.h opt_kern_tls.h opt_vlan.h opt_ratelimit.h +SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_vlan.h opt_ratelimit.h .include <bsd.kmod.mk> diff --git a/sys/modules/linux64/Makefile b/sys/modules/linux64/Makefile index b23891a65a4f..327da11afdaf 100644 --- a/sys/modules/linux64/Makefile +++ b/sys/modules/linux64/Makefile @@ -31,6 +31,7 @@ SRCS= linux_dummy_machdep.c \ opt_ktrace.h \ opt_inet6.h \ opt_posix.h \ + opt_usb.h \ bus_if.h \ device_if.h \ vnode_if.h \ diff --git a/sys/modules/md/Makefile b/sys/modules/md/Makefile index 2b0586c44717..3f16e04860a1 100644 --- a/sys/modules/md/Makefile +++ b/sys/modules/md/Makefile @@ -1,6 +1,6 @@ .PATH: ${SRCTOP}/sys/dev/md KMOD= geom_md -SRCS= md.c opt_md.h opt_geom.h opt_rootdevname.h vnode_if.h +SRCS= bus_if.h device_if.h md.c opt_md.h opt_geom.h opt_rootdevname.h vnode_if.h .include <bsd.kmod.mk> diff --git a/sys/modules/miiproxy/Makefile b/sys/modules/miiproxy/Makefile index 5173358989da..730bef4220cd 100644 --- a/sys/modules/miiproxy/Makefile +++ b/sys/modules/miiproxy/Makefile @@ -3,7 +3,7 @@ KMOD = miiproxy SRCS= miiproxy.c -SRCS+= mdio_if.h miibus_if.h +SRCS+= bus_if.h mdio_if.h miibus_if.h opt_platform.h CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch .include <bsd.kmod.mk> diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile index 506c045ab0ce..65341fdfb8aa 100644 --- a/sys/modules/mlx5/Makefile +++ b/sys/modules/mlx5/Makefile @@ -46,7 +46,7 @@ mlx5_ipsec_offload.c \ mlx5_ipsec.c \ mlx5_ipsec_rxtx.c SRCS+= ${LINUXKPI_GENSRCS} -SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h +SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_rss.h opt_ratelimit.h CFLAGS+= -I${SRCTOP}/sys/ofed/include CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile index 03bf174e33b0..3697fa65dc83 100644 --- a/sys/modules/mlx5en/Makefile +++ b/sys/modules/mlx5en/Makefile @@ -15,7 +15,7 @@ mlx5_en_rl.c \ mlx5_en_txrx.c \ mlx5_en_port_buffer.c SRCS+= ${LINUXKPI_GENSRCS} -SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h opt_kern_tls.h +SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h opt_rss.h opt_ratelimit.h opt_kern_tls.h .if defined(HAVE_PER_CQ_EVENT_PACKET) CFLAGS+= -DHAVE_PER_CQ_EVENT_PACKET diff --git a/sys/modules/netgraph/ksocket/Makefile b/sys/modules/netgraph/ksocket/Makefile index 395fdbd7b3e3..7099648f6219 100644 --- a/sys/modules/netgraph/ksocket/Makefile +++ b/sys/modules/netgraph/ksocket/Makefile @@ -1,4 +1,6 @@ KMOD= ng_ksocket SRCS= ng_ksocket.c +SRCS+= opt_inet6.h + .include <bsd.kmod.mk> diff --git a/sys/modules/nvmf/nvmf/Makefile b/sys/modules/nvmf/nvmf/Makefile index 7ebe614998bd..21d73d363d2f 100644 --- a/sys/modules/nvmf/nvmf/Makefile +++ b/sys/modules/nvmf/nvmf/Makefile @@ -10,4 +10,7 @@ SRCS= nvmf.c \ nvmf_qpair.c \ nvmf_sim.c +SRCS+= bus_if.h device_if.h +SRCS+= opt_cam.h + .include <bsd.kmod.mk> diff --git a/sys/modules/qlnx/qlnxev/Makefile b/sys/modules/qlnx/qlnxev/Makefile index ed62f1f1dd40..766a5a950032 100644 --- a/sys/modules/qlnx/qlnxev/Makefile +++ b/sys/modules/qlnx/qlnxev/Makefile @@ -49,6 +49,7 @@ SRCS+=ecore_vf.c SRCS+=qlnx_ioctl.c SRCS+=qlnx_os.c +SRCS+=opt_inet.h SRCS+= ${LINUXKPI_GENSRCS} diff --git a/sys/modules/rtw88/Makefile b/sys/modules/rtw88/Makefile index 9739ede11073..822be639da43 100644 --- a/sys/modules/rtw88/Makefile +++ b/sys/modules/rtw88/Makefile @@ -43,6 +43,7 @@ SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_wlan.h opt_inet6.h opt_inet.h CFLAGS+= -DKBUILD_MODNAME='"rtw88"' +CFLAGS+= -DLINUXKPI_VERSION=61400 CFLAGS+= -I${DEVRTW88DIR} CFLAGS+= ${LINUXKPI_INCLUDES} diff --git a/sys/modules/rtw89/Makefile b/sys/modules/rtw89/Makefile index 09580f288c62..e66f85c3ac17 100644 --- a/sys/modules/rtw89/Makefile +++ b/sys/modules/rtw89/Makefile @@ -39,6 +39,7 @@ SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_wlan.h opt_inet6.h opt_inet.h opt_acpi.h CFLAGS+= -DKBUILD_MODNAME='"rtw89"' +CFLAGS+= -DLINUXKPI_VERSION=61400 CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI CFLAGS+= -I${DEVRTW89DIR} diff --git a/sys/modules/uinput/Makefile b/sys/modules/uinput/Makefile index 66ade2a5bb33..a9e2ec867b91 100644 --- a/sys/modules/uinput/Makefile +++ b/sys/modules/uinput/Makefile @@ -2,6 +2,6 @@ KMOD= uinput SRCS= uinput.c -SRCS+= opt_evdev.h +SRCS+= opt_evdev.h opt_kbd.h .include <bsd.kmod.mk> diff --git a/sys/modules/usb/usie/Makefile b/sys/modules/usb/usie/Makefile index 6a5f79248ff8..9edeed082f8d 100644 --- a/sys/modules/usb/usie/Makefile +++ b/sys/modules/usb/usie/Makefile @@ -29,6 +29,6 @@ KMOD = usie SRCS = if_usie.c SRCS += opt_bus.h opt_usb.h device_if.h bus_if.h \ - usb_if.h usbdevs.h opt_inet.h + usb_if.h usbdevs.h opt_inet.h opt_inet6.h .include <bsd.kmod.mk> diff --git a/sys/modules/usb/wmt/Makefile b/sys/modules/usb/wmt/Makefile index 72cf1d814908..8cb5abd7383e 100644 --- a/sys/modules/usb/wmt/Makefile +++ b/sys/modules/usb/wmt/Makefile @@ -3,6 +3,6 @@ S= ${SRCTOP}/sys .PATH: $S/dev/usb/input KMOD= wmt -SRCS= opt_bus.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h wmt.c +SRCS= opt_bus.h opt_kbd.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h wmt.c .include <bsd.kmod.mk> diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile index 2dd9e2be3f56..ec531ed646a7 100644 --- a/sys/modules/zfs/Makefile +++ b/sys/modules/zfs/Makefile @@ -15,6 +15,7 @@ KMOD= zfs ${SRCDIR}/icp/asm-ppc64/sha2 \ ${SRCDIR}/icp/asm-ppc64/blake3 \ ${SRCDIR}/icp/asm-x86_64/blake3 \ + ${SRCDIR}/icp/asm-x86_64/modes \ ${SRCDIR}/icp/asm-x86_64/sha2 \ ${SRCDIR}/os/freebsd/spl \ ${SRCDIR}/os/freebsd/zfs \ @@ -40,7 +41,8 @@ CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS \ .if ${MACHINE_ARCH} == "amd64" CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \ - -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW + -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL -DHAVE_AVX512BW \ + -DHAVE_VAES -DHAVE_VPCLMULQDQ .endif .if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \ @@ -82,6 +84,9 @@ SRCS+= blake3_avx2.S \ blake3_avx512.S \ blake3_sse2.S \ blake3_sse41.S + +#icp/asm-x86_64/modes +SRCS+= aesni-gcm-avx2-vaes.S .endif #icp/algs/sha2 diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index c79c9eaa1a5f..72167b752e53 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -704,6 +704,11 @@ /* iops->setattr() takes struct user_namespace* */ /* #undef HAVE_USERNS_IOPS_SETATTR */ +#ifdef __amd64__ +/* Define if host toolchain supports VAES */ +#define HAVE_VAES 1 +#endif + /* fops->clone_file_range() is available */ /* #undef HAVE_VFS_CLONE_FILE_RANGE */ @@ -743,6 +748,11 @@ /* __vmalloc page flags exists */ /* #undef HAVE_VMALLOC_PAGE_KERNEL */ +#ifdef __amd64__ +/* Define if host toolchain supports VPCLMULQDQ */ +#define HAVE_VPCLMULQDQ 1 +#endif + /* int (*writepage_t)() takes struct folio* */ /* #undef HAVE_WRITEPAGE_T_FOLIO */ @@ -830,7 +840,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.3.99-539-FreeBSD_g1d0b94c4e" +#define ZFS_META_ALIAS "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -839,7 +849,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.15" +#define ZFS_META_KVER_MAX "6.16" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "4.18" @@ -860,10 +870,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "539-FreeBSD_g1d0b94c4e" +#define ZFS_META_RELEASE "zfs-2.4.0-rc1-FreeBSD_g00dfa094a" /* Define the project version. */ -#define ZFS_META_VERSION "2.3.99" +#define ZFS_META_VERSION "2.4.0" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 20fd58c620b5..2b5d717da216 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.3.99-539-g1d0b94c4e" +#define ZFS_META_GITREV "zfs-2.4.0-rc1-0-g00dfa094a" diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index b0f579f688ac..5ed8c19f3128 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -159,7 +159,7 @@ struct ifbreq { uint32_t ifbr_addrexceeded; /* member if addr violations */ ether_vlanid_t ifbr_pvid; /* member if PVID */ uint16_t ifbr_vlanproto; /* member if VLAN protocol */ - uint8_t pad[32]; + uint8_t pad[28]; }; /* BRDGGIFFLAGS, BRDGSIFFLAGS */ diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 5a74ffa1cc2f..d780e49af25f 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -153,7 +153,7 @@ int if_clone_destroy(const char *); int if_clone_list(struct if_clonereq *); void if_clone_restoregroup(struct ifnet *); -/* The below interfaces are used only by epair(4). */ +/* The below interfaces are used only by epair(4) and tun(4)/tap(4). */ void if_clone_addif(struct if_clone *, struct ifnet *); int if_clone_destroyif(struct if_clone *, struct ifnet *); diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 1efc220aa8e1..e99df0b85ccf 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -160,8 +160,8 @@ struct pfsync_ins_ack { struct pfsync_upd_c { u_int64_t id; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; u_int32_t creatorid; u_int32_t expire; u_int8_t timeout; diff --git a/sys/net/if_tap.h b/sys/net/if_tap.h index d84cd2eba6f3..8297b8d9e3d2 100644 --- a/sys/net/if_tap.h +++ b/sys/net/if_tap.h @@ -57,6 +57,8 @@ #define TAPGIFNAME TUNGIFNAME #define TAPSVNETHDR _IOW('t', 91, int) #define TAPGVNETHDR _IOR('t', 94, int) +#define TAPSTRANSIENT TUNSTRANSIENT +#define TAPGTRANSIENT TUNGTRANSIENT /* VMware ioctl's */ #define VMIO_SIOCSIFFLAGS _IOWINT('V', 0) diff --git a/sys/net/if_tun.h b/sys/net/if_tun.h index a8fb61db45a2..ccdc25944823 100644 --- a/sys/net/if_tun.h +++ b/sys/net/if_tun.h @@ -43,5 +43,7 @@ struct tuninfo { #define TUNSIFPID _IO('t', 95) #define TUNSIFHEAD _IOW('t', 96, int) #define TUNGIFHEAD _IOR('t', 97, int) +#define TUNSTRANSIENT _IOW('t', 98, int) +#define TUNGTRANSIENT _IOR('t', 99, int) #endif /* !_NET_IF_TUN_H_ */ diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 5e6f65c04b2f..c8dbb6aa8893 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -132,6 +132,7 @@ struct tuntap_softc { #define TUN_DYING 0x0200 #define TUN_L2 0x0400 #define TUN_VMNET 0x0800 +#define TUN_TRANSIENT 0x1000 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) #define TUN_READY (TUN_OPEN | TUN_INITED) @@ -443,6 +444,18 @@ tuntap_name2info(const char *name, int *outunit, int *outflags) return (0); } +static struct if_clone * +tuntap_cloner_from_flags(int tun_flags) +{ + + for (u_int i = 0; i < NDRV; i++) + if ((tun_flags & TUN_DRIVER_IDENT_MASK) == + tuntap_drivers[i].ident_flags) + return (V_tuntap_driver_cloners[i]); + + return (NULL); +} + /* * Get driver information from a set of flags specified. Masks the identifying * part of the flags and compares it against all of the available @@ -615,19 +628,39 @@ out: CURVNET_RESTORE(); } -static void -tun_destroy(struct tuntap_softc *tp) +static int +tun_destroy(struct tuntap_softc *tp, bool may_intr) { + int error; TUN_LOCK(tp); + + /* + * Transient tunnels may have set TUN_DYING if we're being destroyed as + * a result of the last close, which we'll allow. + */ + MPASS((tp->tun_flags & (TUN_DYING | TUN_TRANSIENT)) != TUN_DYING); tp->tun_flags |= TUN_DYING; - if (tp->tun_busy != 0) - cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); - else - TUN_UNLOCK(tp); + error = 0; + while (tp->tun_busy != 0) { + if (may_intr) + error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); + else + cv_wait(&tp->tun_cv, &tp->tun_mtx); + if (error != 0) { + tp->tun_flags &= ~TUN_DYING; + TUN_UNLOCK(tp); + return (error); + } + } + TUN_UNLOCK(tp); CURVNET_SET(TUN2IFP(tp)->if_vnet); + mtx_lock(&tunmtx); + TAILQ_REMOVE(&tunhead, tp, tun_list); + mtx_unlock(&tunmtx); + /* destroy_dev will take care of any alias. */ destroy_dev(tp->tun_dev); seldrain(&tp->tun_rsel); @@ -648,6 +681,8 @@ tun_destroy(struct tuntap_softc *tp) cv_destroy(&tp->tun_cv); free(tp, M_TUN); CURVNET_RESTORE(); + + return (0); } static int @@ -655,12 +690,7 @@ tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t fla { struct tuntap_softc *tp = ifp->if_softc; - mtx_lock(&tunmtx); - TAILQ_REMOVE(&tunhead, tp, tun_list); - mtx_unlock(&tunmtx); - tun_destroy(tp); - - return (0); + return (tun_destroy(tp, true)); } static void @@ -702,9 +732,9 @@ tun_uninit(const void *unused __unused) mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { - TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - tun_destroy(tp); + /* tun_destroy() will remove it from the tailq. */ + tun_destroy(tp, false); mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); @@ -1217,6 +1247,23 @@ out: tun_vnethdr_set(ifp, 0); tun_unbusy_locked(tp); + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + struct if_clone *cloner; + int error __diagused; + + /* Mark it busy so that nothing can re-open it. */ + tp->tun_flags |= TUN_DYING; + TUN_UNLOCK(tp); + + CURVNET_SET_QUIET(ifp->if_home_vnet); + cloner = tuntap_cloner_from_flags(tp->tun_flags); + CURVNET_RESTORE(); + + error = if_clone_destroyif(cloner, ifp); + MPASS(error == 0 || error == EINTR || error == ERESTART); + return; + } + TUN_UNLOCK(tp); } @@ -1668,6 +1715,19 @@ tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, case TUNGDEBUG: *(int *)data = tundebug; break; + case TUNSTRANSIENT: + TUN_LOCK(tp); + if (*(int *)data) + tp->tun_flags |= TUN_TRANSIENT; + else + tp->tun_flags &= ~TUN_TRANSIENT; + TUN_UNLOCK(tp); + break; + case TUNGTRANSIENT: + TUN_LOCK(tp); + *(int *)data = (tp->tun_flags & TUN_TRANSIENT) != 0; + TUN_UNLOCK(tp); + break; case FIONBIO: break; case FIOASYNC: diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 2b43f6f19051..98c59e5de988 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -142,6 +142,7 @@ struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); +static int iflib_simple_transmit(if_t ifp, struct mbuf *m); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; @@ -198,6 +199,7 @@ struct iflib_ctx { uint8_t ifc_sysctl_use_logical_cores; uint16_t ifc_sysctl_extra_msix_vectors; bool ifc_cpus_are_physical_cores; + bool ifc_sysctl_simple_tx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -725,6 +727,7 @@ static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf *iflib_fixup_rx(struct mbuf *m); #endif +static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh); static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); @@ -2624,8 +2627,10 @@ iflib_stop(if_ctx_t ctx) #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); - /* clean any enqueued buffers */ - iflib_ifmp_purge(txq); + if (!ctx->ifc_sysctl_simple_tx) { + /* clean any enqueued buffers */ + iflib_ifmp_purge(txq); + } /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); @@ -3635,13 +3640,16 @@ defrag: * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { - txq->ift_no_desc_avail++; - bus_dmamap_unload(buf_tag, map); - DBG_COUNTER_INC(encap_txq_avail_fail); - DBG_COUNTER_INC(encap_txd_encap_fail); - if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) - GROUPTASK_ENQUEUE(&txq->ift_task); - return (ENOBUFS); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { + txq->ift_no_desc_avail++; + bus_dmamap_unload(buf_tag, map); + DBG_COUNTER_INC(encap_txq_avail_fail); + DBG_COUNTER_INC(encap_txd_encap_fail); + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) + GROUPTASK_ENQUEUE(&txq->ift_task); + return (ENOBUFS); + } } /* * On Intel cards we can greatly reduce the number of TX interrupts @@ -4014,6 +4022,12 @@ _task_fn_tx(void *context) netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif + if (ctx->ifc_sysctl_simple_tx) { + mtx_lock(&txq->ift_mtx); + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + goto skip_ifmp; + } #ifdef ALTQ if (if_altq_is_enabled(ifp)) iflib_altq_if_start(ifp); @@ -4027,9 +4041,8 @@ _task_fn_tx(void *context) */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); -#ifdef DEV_NETMAP + skip_ifmp: -#endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else @@ -5131,7 +5144,14 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; - + if (ctx->ifc_sysctl_simple_tx) { +#ifndef ALTQ + if_settransmitfn(ifp, iflib_simple_transmit); + device_printf(dev, "using simple if_transmit\n"); +#else + device_printf(dev, "ALTQ prevents using simple if_transmit\n"); +#endif + } iflib_reset_qvalues(ctx); IFNET_WLOCK(); CTX_LOCK(ctx); @@ -6766,6 +6786,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); + SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, + "use simple tx ring"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); @@ -7088,3 +7111,48 @@ iflib_debugnet_poll(if_t ifp, int count) return (0); } #endif /* DEBUGNET */ + + +static inline iflib_txq_t +iflib_simple_select_queue(if_ctx_t ctx, struct mbuf *m) +{ + int qidx; + + if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) + qidx = QIDX(ctx, m); + else + qidx = NTXQSETS(ctx) + FIRST_QSET(ctx) - 1; + return (&ctx->ifc_txqs[qidx]); +} + +static int +iflib_simple_transmit(if_t ifp, struct mbuf *m) +{ + if_ctx_t ctx; + iflib_txq_t txq; + int error; + int bytes_sent = 0, pkt_sent = 0, mcast_sent = 0; + + + ctx = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (EBUSY); + txq = iflib_simple_select_queue(ctx, m); + mtx_lock(&txq->ift_mtx); + error = iflib_encap(txq, &m); + if (error == 0) { + pkt_sent++; + bytes_sent += m->m_pkthdr.len; + mcast_sent += !!(m->m_flags & M_MCAST); + (void)iflib_txd_db_check(txq, true); + } + (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); + mtx_unlock(&txq->ift_mtx); + if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); + if (mcast_sent) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); + + return (error); +} diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index c397f0b67896..d6c13470f2eb 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1020,7 +1020,7 @@ struct pf_state_scrub_export { #define PF_SCRUB_FLAG_VALID 0x01 uint8_t scrub_flag; uint32_t pfss_ts_mod; /* timestamp modulation */ -}; +} __packed; struct pf_state_key_export { struct pf_addr addr[2]; @@ -1037,7 +1037,7 @@ struct pf_state_peer_export { uint8_t state; /* active state level */ uint8_t wscale; /* window scaling factor */ uint8_t dummy[6]; -}; +} __packed; _Static_assert(sizeof(struct pf_state_peer_export) == 32, "size incorrect"); struct pf_state_export { @@ -1179,26 +1179,6 @@ struct pf_test_ctx { * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ -struct pfsync_state_scrub { - u_int16_t pfss_flags; - u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 - u_int8_t scrub_flag; - u_int32_t pfss_ts_mod; /* timestamp modulation */ -} __packed; - -struct pfsync_state_peer { - struct pfsync_state_scrub scrub; /* state is scrubbed */ - u_int32_t seqlo; /* Max sequence number sent */ - u_int32_t seqhi; /* Max the other end ACKd + win */ - u_int32_t seqdiff; /* Sequence number modulator */ - u_int16_t max_win; /* largest window (pre scaling) */ - u_int16_t mss; /* Maximum segment size option */ - u_int8_t state; /* active state level */ - u_int8_t wscale; /* window scaling factor */ - u_int8_t pad[6]; -} __packed; - struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; @@ -1208,8 +1188,8 @@ struct pfsync_state_1301 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1235,8 +1215,8 @@ struct pfsync_state_1400 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; - struct pfsync_state_peer src; - struct pfsync_state_peer dst; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; @@ -1323,39 +1303,10 @@ extern pflog_packet_t *pflog_packet_ptr; /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) +void pf_state_peer_hton(const struct pf_state_peer *, + struct pf_state_peer_export *); +void pf_state_peer_ntoh(const struct pf_state_peer_export *, + struct pf_state_peer *); #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h index 141b13f9f740..3684fba52c5c 100644 --- a/sys/net80211/ieee80211_freebsd.h +++ b/sys/net80211/ieee80211_freebsd.h @@ -93,12 +93,22 @@ typedef struct { } while (0) #define IEEE80211_TX_LOCK_OBJ(_ic) (&(_ic)->ic_txlock.mtx) #define IEEE80211_TX_LOCK_DESTROY(_ic) mtx_destroy(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_LOCK(_ic) mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_UNLOCK(_ic) mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic)) -#define IEEE80211_TX_LOCK_ASSERT(_ic) \ - mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED) -#define IEEE80211_TX_UNLOCK_ASSERT(_ic) \ - mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED) +#define IEEE80211_TX_LOCK(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_lock(IEEE80211_TX_LOCK_OBJ(_ic)); \ + } while (0); +#define IEEE80211_TX_UNLOCK(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_unlock(IEEE80211_TX_LOCK_OBJ(_ic)); \ + } while (0); +#define IEEE80211_TX_LOCK_ASSERT(_ic) do { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_OWNED); \ + } while (0) +#define IEEE80211_TX_UNLOCK_ASSERT(_ic) { \ + if (!IEEE80211_CONF_SEQNO_OFFLOAD(_ic)) \ + mtx_assert(IEEE80211_TX_LOCK_OBJ(_ic), MA_NOTOWNED); \ + } while (0) /* * Stageq / ni_tx_superg lock diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index afe83ea0805c..57fe687adffe 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -974,7 +974,7 @@ ieee80211_send_setup( /* NB: zero out i_seq field (for s/w encryption etc) */ *(uint16_t *)&wh->i_seq[0] = 0; - } else + } else if (!IEEE80211_CONF_SEQNO_OFFLOAD(ni->ni_ic)) ieee80211_output_seqno_assign(ni, tid, m); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) @@ -1810,7 +1810,8 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, * and we don't need the TX lock held. */ if ((m->m_flags & M_AMPDU_MPDU) == 0) { - ieee80211_output_seqno_assign(ni, tid, m); + if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic)) + ieee80211_output_seqno_assign(ni, tid, m); } else { /* * NB: don't assign a sequence # to potential @@ -1828,7 +1829,9 @@ ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, *(uint16_t *)wh->i_seq = 0; } } else { - ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID, m); + if (!IEEE80211_CONF_SEQNO_OFFLOAD(ic)) + ieee80211_output_seqno_assign(ni, IEEE80211_NONQOS_TID, + m); /* * XXX TODO: we shouldn't allow EAPOL, etc that would * be forced to be non-QoS traffic to be A-MSDU encapsulated. @@ -3856,6 +3859,8 @@ ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) * If the driver identifies it does its own TX seqno management then * we can skip this (and still not do the TX seqno.) */ + + /* TODO: IEEE80211_CONF_SEQNO_OFFLOAD() */ ieee80211_output_beacon_seqno_assign(ni, m); /* XXX faster to recalculate entirely or just changes? */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6492495dc583..ec7102223c2d 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -2562,299 +2562,272 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, hhook_run_tcp_est_in(tp, th, &to); #endif - if (SEQ_LEQ(th->th_ack, tp->snd_una)) { - maxseg = tcp_maxseg(tp); - if (no_data && - (tiwin == tp->snd_wnd || - (tp->t_flags & TF_SACK_PERMIT))) { + if (SEQ_LT(th->th_ack, tp->snd_una)) { + /* This is old ACK information, don't process it. */ + break; + } + if (th->th_ack == tp->snd_una) { + /* Check if this is a duplicate ACK. */ + if ((tp->t_flags & TF_SACK_PERMIT) && + V_tcp_do_newsack) { /* - * If this is the first time we've seen a - * FIN from the remote, this is not a - * duplicate and it needs to be processed - * normally. This happens during a - * simultaneous close. + * If SEG.ACK == SND.UNA, RFC 6675 requires a + * duplicate ACK to selectively acknowledge + * at least one byte, which was not selectively + * acknowledged before. */ - if ((thflags & TH_FIN) && - (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { - tp->t_dupacks = 0; + if (sack_changed == SACK_NOCHANGE) { break; } - TCPSTAT_INC(tcps_rcvdupack); - /* - * If we have outstanding data (other than - * a window probe), this is a completely - * duplicate ack (ie, window info didn't - * change and FIN isn't set), - * the ack is the biggest we've - * seen and we've seen exactly our rexmt - * threshold of them, assume a packet - * has been dropped and retransmit it. - * Kludge snd_nxt & the congestion - * window so we send only this one - * packet. - * - * We know we're losing at the current - * window size so do congestion avoidance - * (set ssthresh to half the current window - * and pull our congestion window back to - * the new ssthresh). - * - * Dup acks mean that packets have left the - * network (they're now cached at the receiver) - * so bump cwnd by the amount in the receiver - * to keep a constant cwnd packets in the - * network. - * - * When using TCP ECN, notify the peer that - * we reduced the cwnd. - */ + } else { /* - * Following 2 kinds of acks should not affect - * dupack counting: - * 1) Old acks - * 2) Acks with SACK but without any new SACK - * information in them. These could result from - * any anomaly in the network like a switch - * duplicating packets or a possible DoS attack. + * If SEG.ACK == SND.UNA, RFC 5681 requires a + * duplicate ACK to have no data on it and to + * not be a window update. */ - if (th->th_ack != tp->snd_una || - (tcp_is_sack_recovery(tp, &to) && - (sack_changed == SACK_NOCHANGE))) { + if (!no_data || tiwin != tp->snd_wnd) { break; - } else if (!tcp_timer_active(tp, TT_REXMT)) { - tp->t_dupacks = 0; - } else if (++tp->t_dupacks > tcprexmtthresh || - IN_FASTRECOVERY(tp->t_flags)) { - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - if (V_tcp_do_prr && + } + } + /* + * If this is the first time we've seen a + * FIN from the remote, this is not a + * duplicate ACK and it needs to be processed + * normally. + * This happens during a simultaneous close. + */ + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + tp->t_dupacks = 0; + break; + } + /* Perform duplicate ACK processing. */ + TCPSTAT_INC(tcps_rcvdupack); + maxseg = tcp_maxseg(tp); + if (!tcp_timer_active(tp, TT_REXMT)) { + tp->t_dupacks = 0; + } else if (++tp->t_dupacks > tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, nsegs, CC_DUPACK); + if (V_tcp_do_prr && + IN_FASTRECOVERY(tp->t_flags) && + (tp->t_flags & TF_SACK_PERMIT)) { + tcp_do_prr_ack(tp, th, &to, + sack_changed, &maxseg); + } else if (tcp_is_sack_recovery(tp, &to) && IN_FASTRECOVERY(tp->t_flags) && - (tp->t_flags & TF_SACK_PERMIT)) { - tcp_do_prr_ack(tp, th, &to, - sack_changed, &maxseg); - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - (tp->snd_nxt == tp->snd_max)) { - int awnd; + (tp->snd_nxt == tp->snd_max)) { + int awnd; - /* - * Compute the amount of data in flight first. - * We can inject new data into the pipe iff - * we have less than ssthresh - * worth of data in flight. - */ - awnd = tcp_compute_pipe(tp); - if (awnd < tp->snd_ssthresh) { - tp->snd_cwnd += imax(maxseg, - imin(2 * maxseg, - tp->sackhint.delivered_data)); - if (tp->snd_cwnd > tp->snd_ssthresh) - tp->snd_cwnd = tp->snd_ssthresh; - } - } else if (tcp_is_sack_recovery(tp, &to) && - IN_FASTRECOVERY(tp->t_flags) && - SEQ_LT(tp->snd_nxt, tp->snd_max)) { + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than ssthresh + * worth of data in flight. + */ + awnd = tcp_compute_pipe(tp); + if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += imax(maxseg, imin(2 * maxseg, tp->sackhint.delivered_data)); - } else { - tp->snd_cwnd += maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; } - (void) tcp_output(tp); - goto drop; - } else if (tp->t_dupacks == tcprexmtthresh || - (tp->t_flags & TF_SACK_PERMIT && - V_tcp_do_newsack && - tp->sackhint.sacked_bytes > - (tcprexmtthresh - 1) * maxseg)) { + } else if (tcp_is_sack_recovery(tp, &to) && + IN_FASTRECOVERY(tp->t_flags) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tp->snd_cwnd += imax(maxseg, + imin(2 * maxseg, + tp->sackhint.delivered_data)); + } else { + tp->snd_cwnd += maxseg; + } + (void) tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == tcprexmtthresh || + (tp->t_flags & TF_SACK_PERMIT && + V_tcp_do_newsack && + tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * maxseg)) { enter_recovery: - /* - * Above is the RFC6675 trigger condition of - * more than (dupthresh-1)*maxseg sacked data. - * If the count of holes in the - * scoreboard is >= dupthresh, we could - * also enter loss recovery, but don't - * have that value readily available. - */ - tp->t_dupacks = tcprexmtthresh; - tcp_seq onxt = tp->snd_nxt; + /* + * Above is the RFC6675 trigger condition of + * more than (dupthresh-1)*maxseg sacked data. + * If the count of holes in the + * scoreboard is >= dupthresh, we could + * also enter loss recovery, but don't + * have that value readily available. + */ + tp->t_dupacks = tcprexmtthresh; + tcp_seq onxt = tp->snd_nxt; - /* - * If we're doing sack, check to - * see if we're already in sack - * recovery. If we're not doing sack, - * check to see if we're in newreno - * recovery. - */ - if (tcp_is_sack_recovery(tp, &to)) { - if (IN_FASTRECOVERY(tp->t_flags)) { - tp->t_dupacks = 0; - break; - } - } else { - if (SEQ_LEQ(th->th_ack, - tp->snd_recover)) { - tp->t_dupacks = 0; - break; - } + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tcp_is_sack_recovery(tp, &to)) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; } - /* Congestion signal before ack. */ - cc_cong_signal(tp, th, CC_NDUPACK); - cc_ack_received(tp, th, nsegs, - CC_DUPACK); - tcp_timer_activate(tp, TT_REXMT, 0); - tp->t_rtttime = 0; - if (V_tcp_do_prr) { - /* - * snd_ssthresh and snd_recover are - * already updated by cc_cong_signal. - */ - if (tcp_is_sack_recovery(tp, &to)) { - /* - * Include Limited Transmit - * segments here - */ - tp->sackhint.prr_delivered = - imin(tp->snd_max - th->th_ack, - (tp->snd_limited + 1) * maxseg); - } else { - tp->sackhint.prr_delivered = - maxseg; - } - tp->sackhint.recover_fs = max(1, - tp->snd_nxt - tp->snd_una); + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; } - tp->snd_limited = 0; + } + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, nsegs, CC_DUPACK); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + if (V_tcp_do_prr) { + /* + * snd_ssthresh and snd_recover are + * already updated by cc_cong_signal. + */ if (tcp_is_sack_recovery(tp, &to)) { - TCPSTAT_INC(tcps_sack_recovery_episode); /* - * When entering LR after RTO due to - * Duplicate ACKs, retransmit existing - * holes from the scoreboard. + * Include Limited Transmit + * segments here */ - tcp_resend_sackholes(tp); - /* Avoid inflating cwnd in tcp_output */ - tp->snd_nxt = tp->snd_max; - tp->snd_cwnd = tcp_compute_pipe(tp) + + tp->sackhint.prr_delivered = + imin(tp->snd_max - th->th_ack, + (tp->snd_limited + 1) * maxseg); + } else { + tp->sackhint.prr_delivered = maxseg; - (void) tcp_output(tp); - /* Set cwnd to the expected flightsize */ - tp->snd_cwnd = tp->snd_ssthresh; - if (SEQ_GT(th->th_ack, tp->snd_una)) { - goto resume_partialack; - } - goto drop; } - tp->snd_nxt = th->th_ack; - tp->snd_cwnd = maxseg; - (void) tcp_output(tp); - KASSERT(tp->snd_limited <= 2, - ("%s: tp->snd_limited too big", - __func__)); - tp->snd_cwnd = tp->snd_ssthresh + - maxseg * - (tp->t_dupacks - tp->snd_limited); - if (SEQ_GT(onxt, tp->snd_nxt)) - tp->snd_nxt = onxt; - goto drop; - } else if (V_tcp_do_rfc3042) { - /* - * Process first and second duplicate - * ACKs. Each indicates a segment - * leaving the network, creating room - * for more. Make sure we can send a - * packet on reception of each duplicate - * ACK by increasing snd_cwnd by one - * segment. Restore the original - * snd_cwnd after packet transmission. - */ - cc_ack_received(tp, th, nsegs, CC_DUPACK); - uint32_t oldcwnd = tp->snd_cwnd; - tcp_seq oldsndmax = tp->snd_max; - u_int sent; - int avail; - - KASSERT(tp->t_dupacks == 1 || - tp->t_dupacks == 2, - ("%s: dupacks not 1 or 2", - __func__)); - if (tp->t_dupacks == 1) - tp->snd_limited = 0; - if ((tp->snd_nxt == tp->snd_max) && - (tp->t_rxtshift == 0)) - tp->snd_cwnd = - SEQ_SUB(tp->snd_nxt, - tp->snd_una) - - tcp_sack_adjust(tp); - tp->snd_cwnd += - (tp->t_dupacks - tp->snd_limited) * - maxseg - tcp_sack_adjust(tp); + tp->sackhint.recover_fs = max(1, + tp->snd_nxt - tp->snd_una); + } + tp->snd_limited = 0; + if (tcp_is_sack_recovery(tp, &to)) { + TCPSTAT_INC(tcps_sack_recovery_episode); /* - * Only call tcp_output when there - * is new data available to be sent - * or we need to send an ACK. + * When entering LR after RTO due to + * Duplicate ACKs, retransmit existing + * holes from the scoreboard. */ - SOCK_SENDBUF_LOCK(so); - avail = sbavail(&so->so_snd); - SOCK_SENDBUF_UNLOCK(so); - if (tp->t_flags & TF_ACKNOW || - (avail >= - SEQ_SUB(tp->snd_nxt, tp->snd_una))) { - (void) tcp_output(tp); - } - sent = SEQ_SUB(tp->snd_max, oldsndmax); - if (sent > maxseg) { - KASSERT((tp->t_dupacks == 2 && - tp->snd_limited == 0) || - (sent == maxseg + 1 && - tp->t_flags & TF_SENTFIN) || - (sent < 2 * maxseg && - tp->t_flags & TF_NODELAY), - ("%s: sent too much: %u>%u", - __func__, sent, maxseg)); - tp->snd_limited = 2; - } else if (sent > 0) { - ++tp->snd_limited; - } - tp->snd_cwnd = oldcwnd; + tcp_resend_sackholes(tp); + /* Avoid inflating cwnd in tcp_output */ + tp->snd_nxt = tp->snd_max; + tp->snd_cwnd = tcp_compute_pipe(tp) + + maxseg; + (void) tcp_output(tp); + /* Set cwnd to the expected flightsize */ + tp->snd_cwnd = tp->snd_ssthresh; goto drop; } - } - break; - } else { - /* - * This ack is advancing the left edge, reset the - * counter. - */ - tp->t_dupacks = 0; - /* - * If this ack also has new SACK info, increment the - * counter as per rfc6675. The variable - * sack_changed tracks all changes to the SACK - * scoreboard, including when partial ACKs without - * SACK options are received, and clear the scoreboard - * from the left side. Such partial ACKs should not be - * counted as dupacks here. - */ - if (tcp_is_sack_recovery(tp, &to) && - (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || - ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && - (tp->snd_nxt == tp->snd_max)) { - tp->t_dupacks++; - /* limit overhead by setting maxseg last */ - if (!IN_FASTRECOVERY(tp->t_flags) && - (tp->sackhint.sacked_bytes > - ((tcprexmtthresh - 1) * - (maxseg = tcp_maxseg(tp))))) { - goto enter_recovery; + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = maxseg; + (void) tcp_output(tp); + KASSERT(tp->snd_limited <= 2, + ("%s: tp->snd_limited too big", + __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ + cc_ack_received(tp, th, nsegs, CC_DUPACK); + uint32_t oldcwnd = tp->snd_cwnd; + tcp_seq oldsndmax = tp->snd_max; + u_int sent; + int avail; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + if ((tp->snd_nxt == tp->snd_max) && + (tp->t_rxtshift == 0)) + tp->snd_cwnd = + SEQ_SUB(tp->snd_nxt, + tp->snd_una) - + tcp_sack_adjust(tp); + tp->snd_cwnd += + (tp->t_dupacks - tp->snd_limited) * + maxseg - tcp_sack_adjust(tp); + /* + * Only call tcp_output when there + * is new data available to be sent + * or we need to send an ACK. + */ + SOCK_SENDBUF_LOCK(so); + avail = sbavail(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); + if (tp->t_flags & TF_ACKNOW || + (avail >= + SEQ_SUB(tp->snd_nxt, tp->snd_una))) { + (void) tcp_output(tp); + } + sent = SEQ_SUB(tp->snd_max, oldsndmax); + if (sent > maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == maxseg + 1 && + tp->t_flags & TF_SENTFIN) || + (sent < 2 * maxseg && + tp->t_flags & TF_NODELAY), + ("%s: sent too much: %u>%u", + __func__, sent, maxseg)); + tp->snd_limited = 2; + } else if (sent > 0) { + ++tp->snd_limited; } + tp->snd_cwnd = oldcwnd; + goto drop; } + break; } - -resume_partialack: KASSERT(SEQ_GT(th->th_ack, tp->snd_una), - ("%s: th_ack <= snd_una", __func__)); - + ("%s: SEQ_LEQ(th_ack, snd_una)", __func__)); + /* + * This ack is advancing the left edge, reset the + * counter. + */ + tp->t_dupacks = 0; + /* + * If this ack also has new SACK info, increment the + * t_dupacks as per RFC 6675. The variable + * sack_changed tracks all changes to the SACK + * scoreboard, including when partial ACKs without + * SACK options are received, and clear the scoreboard + * from the left side. Such partial ACKs should not be + * counted as dupacks here. + */ + if (V_tcp_do_newsack && + tcp_is_sack_recovery(tp, &to) && + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { + tp->t_dupacks++; + /* limit overhead by setting maxseg last */ + if (!IN_FASTRECOVERY(tp->t_flags) && + (tp->sackhint.sacked_bytes > + (tcprexmtthresh - 1) * (maxseg = tcp_maxseg(tp)))) { + goto enter_recovery; + } + } /* * If the congestion window was inflated to account * for the other side's cached packets, retract it. diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c index e7908d6f3a44..2391d8ea752c 100644 --- a/sys/netlink/netlink_io.c +++ b/sys/netlink/netlink_io.c @@ -216,16 +216,17 @@ nl_send(struct nl_writer *nw, struct nlpcb *nlp) hdr->nlmsg_len); } - if (nlp->nl_linux && linux_netlink_p != NULL && - __predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) { + if (nlp->nl_linux && linux_netlink_p != NULL) { + nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp); nl_buf_free(nw->buf); nw->buf = NULL; - return (false); + if (nb == NULL) + return (false); + } else { + nb = nw->buf; + nw->buf = NULL; } - nb = nw->buf; - nw->buf = NULL; - SOCK_RECVBUF_LOCK(so); if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) { SOCK_RECVBUF_UNLOCK(so); diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h index d4c451d470b2..794065692901 100644 --- a/sys/netlink/netlink_linux.h +++ b/sys/netlink/netlink_linux.h @@ -37,7 +37,7 @@ struct nlpcb; struct nl_pstate; struct nl_writer; -typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp); +typedef struct nl_buf * msgs_to_linux_cb_t(struct nl_buf *, struct nlpcb *); typedef int msg_from_linux_cb_t(int netlink_family, struct nlmsghdr **hdr, struct nl_pstate *npt); diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index 8b871576d0b2..9beb80792af4 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -403,6 +403,7 @@ static const struct nlattr_parser nla_p_linfo[] = { NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); static const struct nlattr_parser nla_p_if[] = { + { .type = IFLA_ADDRESS, .off = _OUT(ifla_address), .cb = nlattr_get_nla }, { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, { .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 }, diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c index 5f605b05f7b8..21db3017df18 100644 --- a/sys/netlink/route/iface_drivers.c +++ b/sys/netlink/route/iface_drivers.c @@ -82,12 +82,9 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } - if ((lattrs->ifi_change & IFF_UP) != 0 || lattrs->ifi_change == 0) { - /* Request to up or down the interface */ - if (lattrs->ifi_flags & IFF_UP) - if_up(ifp); - else - if_down(ifp); + if ((lattrs->ifi_change & IFF_UP) && (lattrs->ifi_flags & IFF_UP) == 0) { + /* Request to down the interface */ + if_down(ifp); } if (lattrs->ifla_mtu > 0) { @@ -100,8 +97,7 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } - if ((lattrs->ifi_change & IFF_PROMISC) != 0 || - lattrs->ifi_change == 0) { + if (lattrs->ifi_change & IFF_PROMISC) { error = ifpromisc(ifp, lattrs->ifi_flags & IFF_PROMISC); if (error != 0) { nlmsg_report_err_msg(npt, "unable to set promisc"); @@ -109,6 +105,24 @@ _nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, } } + if (lattrs->ifla_address != NULL) { + if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMAC)) { + error = if_setlladdr(ifp, + NLA_DATA(lattrs->ifla_address), + NLA_DATA_LEN(lattrs->ifla_address)); + if (error != 0) { + nlmsg_report_err_msg(npt, + "setting IFLA_ADDRESS failed with error code: %d", + error); + return (error); + } + } else { + nlmsg_report_err_msg(npt, + "Not enough privileges to set IFLA_ADDRESS"); + return (EPERM); + } + } + return (0); } diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h index b84b34461e35..41f110038b54 100644 --- a/sys/netlink/route/route_var.h +++ b/sys/netlink/route/route_var.h @@ -69,6 +69,7 @@ struct nl_parsed_link { char *ifla_cloner; char *ifla_ifalias; struct nlattr *ifla_idata; + struct nlattr *ifla_address; unsigned short ifi_type; int ifi_index; uint32_t ifla_link; diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index e34c08c8c4db..585c196391c0 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -123,8 +123,8 @@ union inet_template { sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) -static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, - struct pfsync_state_peer *); +static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, + struct pf_state_peer_export *); static int pfsync_in_clr(struct mbuf *, int, int, int, int); static int pfsync_in_ins(struct mbuf *, int, int, int, int); static int pfsync_in_iack(struct mbuf *, int, int, int, int); @@ -330,7 +330,7 @@ SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); -static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, +static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); @@ -502,7 +502,7 @@ pfsync_clone_destroy(struct ifnet *ifp) } static int -pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, +pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { @@ -1172,8 +1172,8 @@ pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) } static int -pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, - struct pfsync_state_peer *dst) +pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, + struct pf_state_peer_export *dst) { int sync = 0; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 9d83e7b82e6f..8cd4fff95b15 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -2069,6 +2069,44 @@ pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir) return (false); } +void +pf_state_peer_hton(const struct pf_state_peer *s, struct pf_state_peer_export *d) +{ + d->seqlo = htonl(s->seqlo); + d->seqhi = htonl(s->seqhi); + d->seqdiff = htonl(s->seqdiff); + d->max_win = htons(s->max_win); + d->mss = htons(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub) { + d->scrub.pfss_flags = htons( + s->scrub->pfss_flags & PFSS_TIMESTAMP); + d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; + d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); + d->scrub.scrub_flag = PF_SCRUB_FLAG_VALID; + } +} + +void +pf_state_peer_ntoh(const struct pf_state_peer_export *s, struct pf_state_peer *d) +{ + d->seqlo = ntohl(s->seqlo); + d->seqhi = ntohl(s->seqhi); + d->seqdiff = ntohl(s->seqdiff); + d->max_win = ntohs(s->max_win); + d->mss = ntohs(s->mss); + d->state = s->state; + d->wscale = s->wscale; + if (s->scrub.scrub_flag == PF_SCRUB_FLAG_VALID && + d->scrub != NULL) { + d->scrub->pfss_flags = ntohs(s->scrub.pfss_flags) & + PFSS_TIMESTAMP; + d->scrub->pfss_ttl = s->scrub.pfss_ttl; + d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); + } +} + struct pf_udp_mapping * pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, struct pf_addr *nat_addr, uint16_t nat_port) diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 09754359ec2d..45b5b8dd5fef 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -118,7 +118,7 @@ dump_state_peer(struct nl_writer *nw, int attr, const struct pf_state_peer *peer nlattr_add_u16(nw, PF_STP_PFSS_FLAGS, pfss_flags); nlattr_add_u32(nw, PF_STP_PFSS_TS_MOD, sc->pfss_ts_mod); nlattr_add_u8(nw, PF_STP_PFSS_TTL, sc->pfss_ttl); - nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PFSYNC_SCRUB_FLAG_VALID); + nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PF_SCRUB_FLAG_VALID); } nlattr_set_len(nw, off); diff --git a/sys/riscv/starfive/jh7110_pcie.c b/sys/riscv/starfive/jh7110_pcie.c index 2d0a4be69b2c..5181252ab2dc 100644 --- a/sys/riscv/starfive/jh7110_pcie.c +++ b/sys/riscv/starfive/jh7110_pcie.c @@ -483,6 +483,16 @@ jh7110_pcie_msi_enable_intr(device_t dev, struct intr_irqsrc *isrc) } static void +jh7110_pcie_msi_post_filter(device_t dev, struct intr_irqsrc *isrc) +{ +} + +static void +jh7110_pcie_msi_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ +} + +static void jh7110_pcie_msi_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct jh7110_pcie_softc *sc; @@ -1008,6 +1018,8 @@ static device_method_t jh7110_pcie_methods[] = { /* Interrupt controller interface */ DEVMETHOD(pic_enable_intr, jh7110_pcie_msi_enable_intr), DEVMETHOD(pic_disable_intr, jh7110_pcie_msi_disable_intr), + DEVMETHOD(pic_post_filter, jh7110_pcie_msi_post_filter), + DEVMETHOD(pic_post_ithread, jh7110_pcie_msi_post_ithread), DEVMETHOD(pic_pre_ithread, jh7110_pcie_msi_pre_ithread), /* OFW bus interface */ diff --git a/sys/security/mac_do/mac_do.c b/sys/security/mac_do/mac_do.c index 8856be5fa1a3..7a5ac2e01f75 100644 --- a/sys/security/mac_do/mac_do.c +++ b/sys/security/mac_do/mac_do.c @@ -44,7 +44,7 @@ SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN, &print_parse_error, 0, "Print parse errors on setting rules " "(via sysctl(8))."); -static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do"); +static MALLOC_DEFINE(M_MAC_DO, "mac_do", "mac_do(4) security module"); #define MAC_RULE_STRING_LEN 1024 @@ -319,17 +319,17 @@ toast_rules(struct rules *const rules) struct rule *rule, *rule_next; STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) { - free(rule->uids, M_DO); - free(rule->gids, M_DO); - free(rule, M_DO); + free(rule->uids, M_MAC_DO); + free(rule->gids, M_MAC_DO); + free(rule, M_MAC_DO); } - free(rules, M_DO); + free(rules, M_MAC_DO); } static struct rules * alloc_rules(void) { - struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK); + struct rules *const rules = malloc(sizeof(*rules), M_MAC_DO, M_WAITOK); _Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!"); rules->string[0] = 0; @@ -433,7 +433,7 @@ static void make_parse_error(struct parse_error **const parse_error, const size_t pos, const char *const fmt, ...) { - struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK); + struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO, M_WAITOK); va_list ap; err->pos = pos; @@ -448,7 +448,7 @@ make_parse_error(struct parse_error **const parse_error, const size_t pos, static void free_parse_error(struct parse_error *const parse_error) { - free(parse_error, M_DO); + free(parse_error, M_MAC_DO); } static int @@ -733,7 +733,7 @@ parse_target_clause(char *to, struct rule *const rule, "Too many target clauses of type '%s'.", to_type); return (EOVERFLOW); } - ie = malloc(sizeof(*ie), M_DO, M_WAITOK); + ie = malloc(sizeof(*ie), M_MAC_DO, M_WAITOK); ie->spec = is; STAILQ_INSERT_TAIL(list, ie, ie_entries); check_type_and_id_spec(type, &is); @@ -784,7 +784,7 @@ pour_list_into_rule(const id_type_t type, struct id_list *const list, STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) { MPASS(idx < *nb); array[idx] = ie->spec; - free(ie, M_DO); + free(ie, M_MAC_DO); ++idx; } MPASS(idx == *nb); @@ -874,7 +874,7 @@ parse_single_rule(char *rule, struct rules *const rules, STAILQ_INIT(&gid_list); /* Freed when the 'struct rules' container is freed. */ - new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO); + new = malloc(sizeof(*new), M_MAC_DO, M_WAITOK | M_ZERO); from_type = strsep_noblanks(&rule, "="); MPASS(from_type != NULL); /* Because 'rule' was not NULL. */ @@ -933,7 +933,7 @@ parse_single_rule(char *rule, struct rules *const rules, } while (to_list != NULL); if (new->uids_nb != 0) { - new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO, + new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_MAC_DO, M_WAITOK); error = pour_list_into_rule(IT_UID, &uid_list, new->uids, &new->uids_nb, parse_error); @@ -949,7 +949,7 @@ parse_single_rule(char *rule, struct rules *const rules, } if (new->gids_nb != 0) { - new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO, + new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_MAC_DO, M_WAITOK); error = pour_list_into_rule(IT_GID, &gid_list, new->gids, &new->gids_nb, parse_error); @@ -969,13 +969,13 @@ parse_single_rule(char *rule, struct rules *const rules, return (0); einval: - free(new->gids, M_DO); - free(new->uids, M_DO); - free(new, M_DO); + free(new->gids, M_MAC_DO); + free(new->uids, M_MAC_DO); + free(new, M_MAC_DO); STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next) - free(ie, M_DO); + free(ie, M_MAC_DO); STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next) - free(ie, M_DO); + free(ie, M_MAC_DO); MPASS(*parse_error != NULL); return (EINVAL); } @@ -1028,7 +1028,7 @@ parse_rules(const char *const string, struct rules **const rulesp, bcopy(string, rules->string, len + 1); MPASS(rules->string[len] == '\0'); /* Catch some races. */ - copy = malloc(len + 1, M_DO, M_WAITOK); + copy = malloc(len + 1, M_MAC_DO, M_WAITOK); bcopy(string, copy, len + 1); MPASS(copy[len] == '\0'); /* Catch some races. */ @@ -1046,7 +1046,7 @@ parse_rules(const char *const string, struct rules **const rulesp, *rulesp = rules; out: - free(copy, M_DO); + free(copy, M_MAC_DO); return (error); } @@ -1226,7 +1226,7 @@ parse_and_set_rules(struct prison *const pr, const char *rules_string, static int mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS) { - char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK); + char *const buf = malloc(MAC_RULE_STRING_LEN, M_MAC_DO, M_WAITOK); struct prison *const td_pr = req->td->td_ucred->cr_prison; struct prison *pr; struct rules *rules; @@ -1250,7 +1250,7 @@ mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS) free_parse_error(parse_error); } out: - free(buf, M_DO); + free(buf, M_MAC_DO); return (error); } @@ -1573,7 +1573,7 @@ set_data_header(void *const data, const size_t size, const int priv, static void * alloc_data(void *const data, const size_t size) { - struct mac_do_data_header *const hdr = realloc(data, size, M_DO, + struct mac_do_data_header *const hdr = realloc(data, size, M_MAC_DO, M_WAITOK); MPASS(size >= sizeof(struct mac_do_data_header)); @@ -1602,7 +1602,7 @@ alloc_data(void *const data, const size_t size) static void dealloc_thread_osd(void *const value) { - free(value, M_DO); + free(value, M_MAC_DO); } /* diff --git a/sys/tools/amd64_ia32_vdso.sh b/sys/tools/amd64_ia32_vdso.sh index 85d2299b45d0..e5865639d398 100644 --- a/sys/tools/amd64_ia32_vdso.sh +++ b/sys/tools/amd64_ia32_vdso.sh @@ -58,7 +58,7 @@ then exit 1 fi -${CC} ${DEBUG} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ +${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ -o elf-vdso32.so.o -I. -I"${S}" -include opt_global.h \ -DVDSO_NAME=elf_vdso32_so_1 -DVDSO_FILE=\"elf-vdso32.so.1\" \ "${S}"/tools/vdso_wrap.S diff --git a/sys/tools/amd64_vdso.sh b/sys/tools/amd64_vdso.sh index 2a83ae874ab7..ed91ddc8abb5 100644 --- a/sys/tools/amd64_vdso.sh +++ b/sys/tools/amd64_vdso.sh @@ -67,7 +67,7 @@ then exit 1 fi -${CC} ${DEBUG} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ +${CC} -x assembler-with-cpp -DLOCORE -fPIC -nostdinc -c \ -o elf-vdso.so.o -I. -I"${S}" -include opt_global.h \ -DVDSO_NAME=elf_vdso_so_1 -DVDSO_FILE=\"elf-vdso.so.1\" \ "${S}"/tools/vdso_wrap.S |