aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Moench-Tegeder <cmt@FreeBSD.org>2024-06-03 19:10:23 +0000
committerChristoph Moench-Tegeder <cmt@FreeBSD.org>2024-06-03 19:10:23 +0000
commit3d1ed4d0293a165d1c3bce922acc4d74c7b5d785 (patch)
treebf9ae96f17767222abf1bc7476035f4d9a65b2b7
parent464f4f18d8a3566ac2c174708bea456777291ab0 (diff)
downloadports-3d1ed4d0293a165d1c3bce922acc4d74c7b5d785.tar.gz
ports-3d1ed4d0293a165d1c3bce922acc4d74c7b5d785.zip
www/firefox: update to 127.0 (rc1)
-rw-r--r--www/firefox/Makefile6
-rw-r--r--www/firefox/distinfo6
-rw-r--r--www/firefox/files/patch-libwebrtc-generated221
-rw-r--r--www/firefox/files/patch-rust-1.78.03562
4 files changed, 210 insertions, 3585 deletions
diff --git a/www/firefox/Makefile b/www/firefox/Makefile
index 919d4732b06b..5933b037f60f 100644
--- a/www/firefox/Makefile
+++ b/www/firefox/Makefile
@@ -1,5 +1,5 @@
PORTNAME= firefox
-DISTVERSION= 126.0.2
+DISTVERSION= 127.0
PORTEPOCH= 2
CATEGORIES= www wayland
MASTER_SITES= MOZILLA/${PORTNAME}/releases/${DISTVERSION}${DISTVERSIONSUFFIX}/source \
@@ -11,10 +11,10 @@ COMMENT= Web browser based on the browser portion of Mozilla
WWW= https://www.mozilla.com/firefox
BUILD_DEPENDS= nspr>=4.32:devel/nspr \
- nss>=3.99:security/nss \
+ nss>=3.100:security/nss \
icu>=73.1:devel/icu \
libevent>=2.1.8:devel/libevent \
- harfbuzz>=8.3.1:print/harfbuzz \
+ harfbuzz>=8.4.0:print/harfbuzz \
graphite2>=1.3.14:graphics/graphite2 \
png>=1.6.43:graphics/png \
dav1d>=1.0.0:multimedia/dav1d \
diff --git a/www/firefox/distinfo b/www/firefox/distinfo
index e979ebfc4324..9d9a821fc3be 100644
--- a/www/firefox/distinfo
+++ b/www/firefox/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1716920634
-SHA256 (firefox-126.0.2.source.tar.xz) = 778e08e6756ffceb9ff9ef4429d06f86e2d27249fa65f2be8852359cbe041298
-SIZE (firefox-126.0.2.source.tar.xz) = 560050432
+TIMESTAMP = 1717433155
+SHA256 (firefox-127.0.source.tar.xz) = d2a7bda2a72859c7013ad0a3434e1489db161566ba14365999aea364f6106d40
+SIZE (firefox-127.0.source.tar.xz) = 555156968
diff --git a/www/firefox/files/patch-libwebrtc-generated b/www/firefox/files/patch-libwebrtc-generated
index a56d291ae041..4230b31cd227 100644
--- a/www/firefox/files/patch-libwebrtc-generated
+++ b/www/firefox/files/patch-libwebrtc-generated
@@ -1,7 +1,7 @@
-commit e0f8f58bed7cc4cd65c82ef0ea0e795ac6eeda65
+commit 1fb2333e2d53204e69a318a8c803981d8ded116b
Author: Christoph Moench-Tegeder <cmt@FreeBSD.org>
- regenerate FreeBSD libwebrtc patch for Firefox 126
+ regenerate FreeBSD libwebrtc patch for Firefox 127
diff --git third_party/libwebrtc/api/adaptation/resource_adaptation_api_gn/moz.build third_party/libwebrtc/api/adaptation/resource_adaptation_api_gn/moz.build
index b0671b5d4adb..642f867db664 100644
@@ -14108,6 +14108,193 @@ index 8c04d40cf594..e9f40adf5dcc 100644
+ DEFINES["USE_X11"] = "1"
Library("task_queue_gn")
+diff --git third_party/libwebrtc/api/transport/bandwidth_estimation_settings_gn/moz.build third_party/libwebrtc/api/transport/bandwidth_estimation_settings_gn/moz.build
+index 10e3f8f8356e..56aa757e1b6c 100644
+--- third_party/libwebrtc/api/transport/bandwidth_estimation_settings_gn/moz.build
++++ third_party/libwebrtc/api/transport/bandwidth_estimation_settings_gn/moz.build
+@@ -12,11 +12,21 @@ AllowCompilerWarnings()
+ DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1"
+ DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True
+ DEFINES["RTC_ENABLE_VP9"] = True
++DEFINES["USE_GLIB"] = "1"
++DEFINES["USE_OZONE"] = "1"
++DEFINES["WEBRTC_BSD"] = True
++DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True
+ DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0"
+ DEFINES["WEBRTC_LIBRARY_IMPL"] = True
+ DEFINES["WEBRTC_MOZILLA_BUILD"] = True
+ DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0"
++DEFINES["WEBRTC_POSIX"] = True
+ DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0"
++DEFINES["_FILE_OFFSET_BITS"] = "64"
++DEFINES["_LARGEFILE64_SOURCE"] = True
++DEFINES["_LARGEFILE_SOURCE"] = True
++DEFINES["__STDC_CONSTANT_MACROS"] = True
++DEFINES["__STDC_FORMAT_MACROS"] = True
+
+ FINAL_LIBRARY = "webrtc"
+
+@@ -39,107 +49,17 @@ if not CONFIG["MOZ_DEBUG"]:
+ if CONFIG["MOZ_DEBUG"] == "1":
+
+ DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1"
+-
+-if CONFIG["OS_TARGET"] == "Android":
+-
+- DEFINES["ANDROID"] = True
+- DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1"
+- DEFINES["HAVE_SYS_UIO_H"] = True
+- DEFINES["WEBRTC_ANDROID"] = True
+- DEFINES["WEBRTC_ANDROID_OPENSLES"] = True
+- DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True
+- DEFINES["WEBRTC_LINUX"] = True
+- DEFINES["WEBRTC_POSIX"] = True
+- DEFINES["_GNU_SOURCE"] = True
+- DEFINES["__STDC_CONSTANT_MACROS"] = True
+- DEFINES["__STDC_FORMAT_MACROS"] = True
+-
+-if CONFIG["OS_TARGET"] == "Darwin":
+-
+- DEFINES["WEBRTC_MAC"] = True
+- DEFINES["WEBRTC_POSIX"] = True
+- DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True
+- DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0"
+- DEFINES["__STDC_CONSTANT_MACROS"] = True
+- DEFINES["__STDC_FORMAT_MACROS"] = True
+-
+-if CONFIG["OS_TARGET"] == "Linux":
+-
+- DEFINES["USE_AURA"] = "1"
+- DEFINES["USE_GLIB"] = "1"
+- DEFINES["USE_NSS_CERTS"] = "1"
+- DEFINES["USE_OZONE"] = "1"
+- DEFINES["USE_UDEV"] = True
+- DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True
+- DEFINES["WEBRTC_LINUX"] = True
+- DEFINES["WEBRTC_POSIX"] = True
+- DEFINES["_FILE_OFFSET_BITS"] = "64"
+- DEFINES["_LARGEFILE64_SOURCE"] = True
+- DEFINES["_LARGEFILE_SOURCE"] = True
+- DEFINES["__STDC_CONSTANT_MACROS"] = True
+- DEFINES["__STDC_FORMAT_MACROS"] = True
+-
+-if CONFIG["OS_TARGET"] == "OpenBSD":
+-
+- DEFINES["USE_GLIB"] = "1"
+- DEFINES["USE_OZONE"] = "1"
+- DEFINES["USE_X11"] = "1"
+- DEFINES["WEBRTC_BSD"] = True
+- DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True
+- DEFINES["WEBRTC_POSIX"] = True
+- DEFINES["_FILE_OFFSET_BITS"] = "64"
+- DEFINES["_LARGEFILE64_SOURCE"] = True
+- DEFINES["_LARGEFILE_SOURCE"] = True
+- DEFINES["__STDC_CONSTANT_MACROS"] = True
+- DEFINES["__STDC_FORMAT_MACROS"] = True
+-
+-if CONFIG["OS_TARGET"] == "WINNT":
+-
+- DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True
+- DEFINES["NOMINMAX"] = True
+- DEFINES["NTDDI_VERSION"] = "0x0A000000"
+- DEFINES["PSAPI_VERSION"] = "2"
+- DEFINES["RTC_ENABLE_WIN_WGC"] = True
+- DEFINES["UNICODE"] = True
+- DEFINES["USE_AURA"] = "1"
+- DEFINES["WEBRTC_WIN"] = True
+- DEFINES["WIN32"] = True
+- DEFINES["WIN32_LEAN_AND_MEAN"] = True
+- DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP"
+- DEFINES["WINVER"] = "0x0A00"
+- DEFINES["_ATL_NO_OPENGL"] = True
+- DEFINES["_CRT_RAND_S"] = True
+- DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True
+- DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True
+- DEFINES["_HAS_EXCEPTIONS"] = "0"
+- DEFINES["_HAS_NODISCARD"] = True
+- DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True
+- DEFINES["_SECURE_ATL"] = True
+- DEFINES["_UNICODE"] = True
+- DEFINES["_WIN32_WINNT"] = "0x0A00"
+- DEFINES["_WINDOWS"] = True
+- DEFINES["__STD_C"] = True
++ DEFINES["_DEBUG"] = True
+
+ if CONFIG["TARGET_CPU"] == "aarch64":
+
+ DEFINES["WEBRTC_ARCH_ARM64"] = True
+ DEFINES["WEBRTC_HAS_NEON"] = True
+
+-if CONFIG["TARGET_CPU"] == "arm":
+-
+- DEFINES["WEBRTC_ARCH_ARM"] = True
+- DEFINES["WEBRTC_ARCH_ARM_V7"] = True
+- DEFINES["WEBRTC_HAS_NEON"] = True
+-
+ if CONFIG["TARGET_CPU"] == "mips32":
+
+ DEFINES["MIPS32_LE"] = True
+ DEFINES["MIPS_FPU_LE"] = True
+- DEFINES["_GNU_SOURCE"] = True
+-
+-if CONFIG["TARGET_CPU"] == "mips64":
+-
+- DEFINES["_GNU_SOURCE"] = True
+
+ if CONFIG["TARGET_CPU"] == "x86":
+
+@@ -149,50 +69,8 @@ if CONFIG["TARGET_CPU"] == "x86_64":
+
+ DEFINES["WEBRTC_ENABLE_AVX2"] = True
+
+-if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android":
+-
+- DEFINES["_DEBUG"] = True
+-
+-if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin":
+-
+- DEFINES["_DEBUG"] = True
+-
+-if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux":
+-
+- DEFINES["_DEBUG"] = True
+-
+-if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD":
+-
+- DEFINES["_DEBUG"] = True
+-
+-if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT":
+-
+- DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0"
+-
+-if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux":
++if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "FreeBSD":
+
+ DEFINES["USE_X11"] = "1"
+
+-if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm":
+-
+- OS_LIBS += [
+- "unwind"
+- ]
+-
+-if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64":
+-
+- DEFINES["_GNU_SOURCE"] = True
+-
+-if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm":
+-
+- DEFINES["_GNU_SOURCE"] = True
+-
+-if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86":
+-
+- DEFINES["_GNU_SOURCE"] = True
+-
+-if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64":
+-
+- DEFINES["_GNU_SOURCE"] = True
+-
+ Library("bandwidth_estimation_settings_gn")
diff --git third_party/libwebrtc/api/transport/bitrate_settings_gn/moz.build third_party/libwebrtc/api/transport/bitrate_settings_gn/moz.build
index 2b732ca51e9d..716e91c361d1 100644
--- third_party/libwebrtc/api/transport/bitrate_settings_gn/moz.build
@@ -21584,7 +21771,7 @@ index 491c4880cc46..874d12b31e06 100644
Library("scalability_mode_gn")
diff --git third_party/libwebrtc/api/video_codecs/video_codecs_api_gn/moz.build third_party/libwebrtc/api/video_codecs/video_codecs_api_gn/moz.build
-index c6c127e5b607..c866cb6d800f 100644
+index 13a1c027cfba..56d51a419490 100644
--- third_party/libwebrtc/api/video_codecs/video_codecs_api_gn/moz.build
+++ third_party/libwebrtc/api/video_codecs/video_codecs_api_gn/moz.build
@@ -12,11 +12,21 @@ AllowCompilerWarnings()
@@ -21609,7 +21796,7 @@ index c6c127e5b607..c866cb6d800f 100644
FINAL_LIBRARY = "webrtc"
-@@ -53,185 +63,32 @@ if not CONFIG["MOZ_DEBUG"]:
+@@ -54,185 +64,32 @@ if not CONFIG["MOZ_DEBUG"]:
if CONFIG["MOZ_DEBUG"] == "1":
DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1"
@@ -36669,7 +36856,7 @@ index b884cb8d99d1..cac379b6b793 100644
Library("legacy_encoded_audio_frame_gn")
diff --git third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build
-index 834a8d1265d4..57d0a614a954 100644
+index 9b2996fa22d9..3ac873404dd5 100644
--- third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build
+++ third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build
@@ -12,11 +12,21 @@ AllowCompilerWarnings()
@@ -36694,7 +36881,7 @@ index 834a8d1265d4..57d0a614a954 100644
FINAL_LIBRARY = "webrtc"
-@@ -78,189 +88,32 @@ if not CONFIG["MOZ_DEBUG"]:
+@@ -77,189 +87,32 @@ if not CONFIG["MOZ_DEBUG"]:
if CONFIG["MOZ_DEBUG"] == "1":
DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1"
@@ -64082,7 +64269,7 @@ index 76c4cfe66496..40909203b3cf 100644
Library("timing_module_gn")
diff --git third_party/libwebrtc/modules/video_coding/video_codec_interface_gn/moz.build third_party/libwebrtc/modules/video_coding/video_codec_interface_gn/moz.build
-index 141def90908b..fae7dffcca41 100644
+index c0d139fc6d0a..b367c42eb7a8 100644
--- third_party/libwebrtc/modules/video_coding/video_codec_interface_gn/moz.build
+++ third_party/libwebrtc/modules/video_coding/video_codec_interface_gn/moz.build
@@ -12,11 +12,21 @@ AllowCompilerWarnings()
@@ -64107,7 +64294,7 @@ index 141def90908b..fae7dffcca41 100644
FINAL_LIBRARY = "webrtc"
-@@ -44,185 +54,32 @@ if not CONFIG["MOZ_DEBUG"]:
+@@ -45,185 +55,32 @@ if not CONFIG["MOZ_DEBUG"]:
if CONFIG["MOZ_DEBUG"] == "1":
DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1"
@@ -66060,10 +66247,10 @@ index 883e5c70b24b..caec155ca861 100644
Library("webrtc_vp9_helpers_gn")
diff --git third_party/libwebrtc/moz.build third_party/libwebrtc/moz.build
-index 59472bdc9b56..45b99de39516 100644
+index 68c4cd93c417..9d93a43355ec 100644
--- third_party/libwebrtc/moz.build
+++ third_party/libwebrtc/moz.build
-@@ -266,6 +266,8 @@ DIRS += [
+@@ -267,6 +267,8 @@ DIRS += [
"/third_party/libwebrtc/modules/congestion_controller/goog_cc/send_side_bwe_gn",
"/third_party/libwebrtc/modules/congestion_controller/rtp/control_handler_gn",
"/third_party/libwebrtc/modules/congestion_controller/rtp/transport_feedback_gn",
@@ -66072,7 +66259,7 @@ index 59472bdc9b56..45b99de39516 100644
"/third_party/libwebrtc/modules/module_api_gn",
"/third_party/libwebrtc/modules/module_api_public_gn",
"/third_party/libwebrtc/modules/module_fec_api_gn",
-@@ -465,102 +467,11 @@ DIRS += [
+@@ -466,102 +468,11 @@ DIRS += [
"/third_party/libwebrtc/webrtc_gn"
]
@@ -66176,7 +66363,7 @@ index 59472bdc9b56..45b99de39516 100644
"/third_party/libwebrtc/modules/portal/portal_gn",
"/third_party/libwebrtc/third_party/drm/drm_gn",
"/third_party/libwebrtc/third_party/gbm/gbm_gn",
-@@ -568,26 +479,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64":
+@@ -569,26 +480,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64":
"/third_party/libwebrtc/third_party/pipewire/pipewire_gn"
]
@@ -66204,7 +66391,7 @@ index 59472bdc9b56..45b99de39516 100644
"/third_party/libwebrtc/modules/portal/portal_gn",
"/third_party/libwebrtc/third_party/drm/drm_gn",
"/third_party/libwebrtc/third_party/gbm/gbm_gn",
-@@ -595,11 +489,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "mips32":
+@@ -596,11 +490,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "mips32":
"/third_party/libwebrtc/third_party/pipewire/pipewire_gn"
]
@@ -66217,7 +66404,7 @@ index 59472bdc9b56..45b99de39516 100644
"/third_party/libwebrtc/modules/portal/portal_gn",
"/third_party/libwebrtc/third_party/drm/drm_gn",
"/third_party/libwebrtc/third_party/gbm/gbm_gn",
-@@ -607,7 +499,7 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "mips64":
+@@ -608,7 +500,7 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "mips64":
"/third_party/libwebrtc/third_party/pipewire/pipewire_gn"
]
@@ -66226,7 +66413,7 @@ index 59472bdc9b56..45b99de39516 100644
DIRS += [
"/third_party/libwebrtc/common_audio/common_audio_avx2_gn",
-@@ -615,8 +507,6 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86":
+@@ -616,8 +508,6 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86":
"/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn",
"/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn",
"/third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn",
@@ -66235,7 +66422,7 @@ index 59472bdc9b56..45b99de39516 100644
"/third_party/libwebrtc/modules/portal/portal_gn",
"/third_party/libwebrtc/third_party/drm/drm_gn",
"/third_party/libwebrtc/third_party/gbm/gbm_gn",
-@@ -624,7 +514,7 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86":
+@@ -625,7 +515,7 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86":
"/third_party/libwebrtc/third_party/pipewire/pipewire_gn"
]
@@ -66244,7 +66431,7 @@ index 59472bdc9b56..45b99de39516 100644
DIRS += [
"/third_party/libwebrtc/common_audio/common_audio_avx2_gn",
-@@ -632,69 +522,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64":
+@@ -633,69 +523,9 @@ if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64":
"/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn",
"/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn",
"/third_party/libwebrtc/modules/desktop_capture/desktop_capture_differ_sse2_gn",
diff --git a/www/firefox/files/patch-rust-1.78.0 b/www/firefox/files/patch-rust-1.78.0
deleted file mode 100644
index ebad1becefa5..000000000000
--- a/www/firefox/files/patch-rust-1.78.0
+++ /dev/null
@@ -1,3562 +0,0 @@
-
-# HG changeset patch
-# User Henri Sivonen <hsivonen@hsivonen.fi>
-# Date 1714462184 0
-# Node ID 1db2ef126a6a8555dbf50345e16492c977b42e92
-# Parent a545e84b3674c4878f2e618b7bce23058f2ac690
-Bug 1882209 - Update encoding_rs to 0.8.34 to deal with rustc changes. r=glandium,supply-chain-reviewers
-
-Differential Revision: https://phabricator.services.mozilla.com/D207167
-
-diff --git a/.cargo/config.toml.in b/.cargo/config.toml.in
---- .cargo/config.toml.in
-+++ .cargo/config.toml.in
-@@ -35,16 +35,21 @@ git = "https://github.com/gfx-rs/wgpu"
- rev = "2b0e3ed01cfcc4bcccc7fd63b2581b260c00b089"
- replace-with = "vendored-sources"
-
- [source."git+https://github.com/glandium/mio?rev=9a2ef335c366044ffe73b1c4acabe50a1daefe05"]
- git = "https://github.com/glandium/mio"
- rev = "9a2ef335c366044ffe73b1c4acabe50a1daefe05"
- replace-with = "vendored-sources"
-
-+[source."git+https://github.com/hsivonen/any_all_workaround?rev=7fb1b7034c9f172aade21ee1c8554e8d8a48af80"]
-+git = "https://github.com/hsivonen/any_all_workaround"
-+rev = "7fb1b7034c9f172aade21ee1c8554e8d8a48af80"
-+replace-with = "vendored-sources"
-+
- [source."git+https://github.com/hsivonen/chardetng?rev=3484d3e3ebdc8931493aa5df4d7ee9360a90e76b"]
- git = "https://github.com/hsivonen/chardetng"
- rev = "3484d3e3ebdc8931493aa5df4d7ee9360a90e76b"
- replace-with = "vendored-sources"
-
- [source."git+https://github.com/hsivonen/chardetng_c?rev=ed8a4c6f900a90d4dbc1d64b856e61490a1c3570"]
- git = "https://github.com/hsivonen/chardetng_c"
- rev = "ed8a4c6f900a90d4dbc1d64b856e61490a1c3570"
-diff --git a/Cargo.lock b/Cargo.lock
---- Cargo.lock
-+++ Cargo.lock
-@@ -87,16 +87,25 @@ dependencies = [
-
- [[package]]
- name = "anstyle"
- version = "1.0.3"
- source = "registry+https://github.com/rust-lang/crates.io-index"
- checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46"
-
- [[package]]
-+name = "any_all_workaround"
-+version = "0.1.0"
-+source = "git+https://github.com/hsivonen/any_all_workaround?rev=7fb1b7034c9f172aade21ee1c8554e8d8a48af80#7fb1b7034c9f172aade21ee1c8554e8d8a48af80"
-+dependencies = [
-+ "cfg-if 1.0.0",
-+ "version_check",
-+]
-+
-+[[package]]
- name = "anyhow"
- version = "1.0.69"
- source = "registry+https://github.com/rust-lang/crates.io-index"
- checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
-
- [[package]]
- name = "app_services_logger"
- version = "0.1.0"
-@@ -1571,22 +1580,22 @@ dependencies = [
- "encoding_rs",
- "nserror",
- "nsstring",
- "xmldecl",
- ]
-
- [[package]]
- name = "encoding_rs"
--version = "0.8.33"
--source = "registry+https://github.com/rust-lang/crates.io-index"
--checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
--dependencies = [
-+version = "0.8.34"
-+source = "registry+https://github.com/rust-lang/crates.io-index"
-+checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
-+dependencies = [
-+ "any_all_workaround",
- "cfg-if 1.0.0",
-- "packed_simd",
- ]
-
- [[package]]
- name = "enum-map"
- version = "2.7.3"
- source = "registry+https://github.com/rust-lang/crates.io-index"
- checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9"
- dependencies = [
-@@ -4321,26 +4330,16 @@ checksum = "8d91edf4fbb970279443471345a4
- name = "oxilangtag-ffi"
- version = "0.1.0"
- dependencies = [
- "nsstring",
- "oxilangtag",
- ]
-
- [[package]]
--name = "packed_simd"
--version = "0.3.9"
--source = "registry+https://github.com/rust-lang/crates.io-index"
--checksum = "1f9f08af0c877571712e2e3e686ad79efad9657dbf0f7c3c8ba943ff6c38932d"
--dependencies = [
-- "cfg-if 1.0.0",
-- "num-traits",
--]
--
--[[package]]
- name = "parking_lot"
- version = "0.12.1"
- source = "registry+https://github.com/rust-lang/crates.io-index"
- checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
- dependencies = [
- "lock_api",
- "parking_lot_core",
- ]
-diff --git a/Cargo.toml b/Cargo.toml
---- Cargo.toml
-+++ Cargo.toml
-@@ -188,16 +188,17 @@ moz_asserts = { path = "mozglue/static/r
- rure = { path = "third_party/rust/rure" }
-
- # To-be-published changes.
- cssparser = { git = "https://github.com/servo/rust-cssparser", rev = "aaa966d9d6ae70c4b8a62bb5e3a14c068bb7dff0" }
- cssparser-macros = { git = "https://github.com/servo/rust-cssparser", rev = "aaa966d9d6ae70c4b8a62bb5e3a14c068bb7dff0" }
- unicode-bidi = { git = "https://github.com/servo/unicode-bidi", rev = "ca612daf1c08c53abe07327cb3e6ef6e0a760f0c" }
-
- # Other overrides
-+any_all_workaround = { git = "https://github.com/hsivonen/any_all_workaround", rev = "7fb1b7034c9f172aade21ee1c8554e8d8a48af80" }
- chardetng = { git = "https://github.com/hsivonen/chardetng", rev = "3484d3e3ebdc8931493aa5df4d7ee9360a90e76b" }
- chardetng_c = { git = "https://github.com/hsivonen/chardetng_c", rev = "ed8a4c6f900a90d4dbc1d64b856e61490a1c3570" }
- coremidi = { git = "https://github.com/chris-zen/coremidi.git", rev = "fc68464b5445caf111e41f643a2e69ccce0b4f83" }
- cose = { git = "https://github.com/franziskuskiefer/cose-rust", rev = "43c22248d136c8b38fe42ea709d08da6355cf04b" }
- firefox-on-glean = { path = "toolkit/components/glean/api" }
- icu_capi = { path = "intl/icu_capi" }
- icu_segmenter_data = { path = "intl/icu_segmenter_data" }
- libudev-sys = { path = "dom/webauthn/libudev-sys" }
-diff --git a/config/makefiles/rust.mk b/config/makefiles/rust.mk
---- config/makefiles/rust.mk
-+++ config/makefiles/rust.mk
-@@ -260,17 +260,17 @@ export COREAUDIO_SDK_PATH=$(IPHONEOS_SDK
- export IPHONEOS_SDK_DIR
- PATH := $(topsrcdir)/build/macosx:$(PATH)
- endif
- endif
-
- ifndef RUSTC_BOOTSTRAP
- RUSTC_BOOTSTRAP := mozglue_static,qcms
- ifdef MOZ_RUST_SIMD
--RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,packed_simd
-+RUSTC_BOOTSTRAP := $(RUSTC_BOOTSTRAP),encoding_rs,any_all_workaround
- endif
- export RUSTC_BOOTSTRAP
- endif
-
- target_rust_ltoable := force-cargo-library-build $(ADD_RUST_LTOABLE)
- target_rust_nonltoable := force-cargo-test-run force-cargo-program-build
-
- ifdef MOZ_PGO_RUST
-diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml
---- supply-chain/audits.toml
-+++ supply-chain/audits.toml
-@@ -596,16 +596,29 @@ who = "Mike Hommey <mh+mozilla@glandium.
- criteria = "safe-to-deploy"
- delta = "0.1.2 -> 0.1.4"
-
- [[audits.android_system_properties]]
- who = "Mike Hommey <mh+mozilla@glandium.org>"
- criteria = "safe-to-deploy"
- delta = "0.1.4 -> 0.1.5"
-
-+[[audits.any_all_workaround]]
-+who = "Henri Sivonen <hsivonen@hsivonen.fi>"
-+criteria = "safe-to-deploy"
-+version = "0.1.0"
-+notes = "The little code that is in this crate I reviewed and modified from packed_simd (which has previously been vendored in full instead of just this small part)."
-+
-+[[audits.any_all_workaround]]
-+who = "Henri Sivonen <hsivonen@hsivonen.fi>"
-+criteria = "safe-to-deploy"
-+delta = "0.1.0 -> 0.1.0@git:7fb1b7034c9f172aade21ee1c8554e8d8a48af80"
-+importable = false
-+notes = "This is a trivial workaround copied from elsewhere in m-c, specifically qcms."
-+
- [[audits.anyhow]]
- who = "Mike Hommey <mh+mozilla@glandium.org>"
- criteria = "safe-to-deploy"
- delta = "1.0.57 -> 1.0.61"
-
- [[audits.anyhow]]
- who = "Bobby Holley <bobbyholley@gmail.com>"
- criteria = "safe-to-deploy"
-diff --git a/supply-chain/config.toml b/supply-chain/config.toml
---- supply-chain/config.toml
-+++ supply-chain/config.toml
-@@ -14,16 +14,20 @@ url = "https://raw.githubusercontent.com
- url = "https://raw.githubusercontent.com/google/supply-chain/main/audits.toml"
-
- [imports.isrg]
- url = "https://raw.githubusercontent.com/divviup/libprio-rs/main/supply-chain/audits.toml"
-
- [imports.mozilla]
- url = "https://raw.githubusercontent.com/mozilla/supply-chain/main/audits.toml"
-
-+[policy.any_all_workaround]
-+audit-as-crates-io = true
-+notes = "This is the upstream code plus the ARM intrinsics workaround from qcms, see bug 1882209."
-+
- [policy.autocfg]
- audit-as-crates-io = true
- notes = "This is the upstream code plus a few local fixes, see bug 1685697."
-
- [policy.chardetng]
- audit-as-crates-io = true
- notes = "This is a crate Henri wrote which is also published. We should probably update Firefox to tip and certify that."
-
-diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock
---- supply-chain/imports.lock
-+++ supply-chain/imports.lock
-@@ -192,16 +192,23 @@ user-name = "David Tolnay"
-
- [[publisher.encoding_rs]]
- version = "0.8.33"
- when = "2023-08-23"
- user-id = 4484
- user-login = "hsivonen"
- user-name = "Henri Sivonen"
-
-+[[publisher.encoding_rs]]
-+version = "0.8.34"
-+when = "2024-04-10"
-+user-id = 4484
-+user-login = "hsivonen"
-+user-name = "Henri Sivonen"
-+
- [[publisher.errno]]
- version = "0.3.8"
- when = "2023-11-28"
- user-id = 6825
- user-login = "sunfishcode"
- user-name = "Dan Gohman"
-
- [[publisher.etagere]]
-diff --git a/third_party/rust/any_all_workaround/.cargo-checksum.json b/third_party/rust/any_all_workaround/.cargo-checksum.json
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/.cargo-checksum.json
-@@ -0,0 +1,1 @@
-+{"files":{"Cargo.toml":"f8c127449dc9432d404c21c99833e4617ab88a797445af249a7fe3c989985d6d","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","LICENSE-MIT-QCMS":"36d847ae882f6574ebc72f56a4f354e4f104fde4a584373496482e97d52d31bc","README.md":"4c617b8ced3a27b7edecf0e5e41ed451c04e88dab529e7a35fccc4e1551efbd7","build.rs":"56b29ab6da3e49075bfd0a7b690267c8016298bf0d332e2e68bbaf19decbbf71","src/lib.rs":"7118106690b9d25c5d0a3e2079feb83d76f1d434d0da36b9d0351806d27c850d"},"package":null}
-\ No newline at end of file
-diff --git a/third_party/rust/any_all_workaround/Cargo.toml b/third_party/rust/any_all_workaround/Cargo.toml
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/Cargo.toml
-@@ -0,0 +1,28 @@
-+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
-+#
-+# When uploading crates to the registry Cargo will automatically
-+# "normalize" Cargo.toml files for maximal compatibility
-+# with all versions of Cargo and also rewrite `path` dependencies
-+# to registry (e.g., crates.io) dependencies.
-+#
-+# If you are reading this file be aware that the original Cargo.toml
-+# will likely look very different (and much more reasonable).
-+# See Cargo.toml.orig for the original contents.
-+
-+[package]
-+edition = "2021"
-+name = "any_all_workaround"
-+version = "0.1.0"
-+authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
-+description = "Workaround for bad LLVM codegen for boolean reductions on 32-bit ARM"
-+homepage = "https://docs.rs/any_all_workaround/"
-+documentation = "https://docs.rs/any_all_workaround/"
-+readme = "README.md"
-+license = "MIT OR Apache-2.0"
-+repository = "https://github.com/hsivonen/any_all_workaround"
-+
-+[dependencies]
-+cfg-if = "1.0"
-+
-+[build-dependencies]
-+version_check = "0.9"
-diff --git a/third_party/rust/packed_simd/LICENSE-APACHE b/third_party/rust/any_all_workaround/LICENSE-APACHE
-rename from third_party/rust/packed_simd/LICENSE-APACHE
-rename to third_party/rust/any_all_workaround/LICENSE-APACHE
-diff --git a/third_party/rust/packed_simd/LICENSE-MIT b/third_party/rust/any_all_workaround/LICENSE-MIT
-rename from third_party/rust/packed_simd/LICENSE-MIT
-rename to third_party/rust/any_all_workaround/LICENSE-MIT
-diff --git a/third_party/rust/any_all_workaround/LICENSE-MIT-QCMS b/third_party/rust/any_all_workaround/LICENSE-MIT-QCMS
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/LICENSE-MIT-QCMS
-@@ -0,0 +1,21 @@
-+qcms
-+Copyright (C) 2009-2024 Mozilla Corporation
-+Copyright (C) 1998-2007 Marti Maria
-+
-+Permission is hereby granted, free of charge, to any person obtaining
-+a copy of this software and associated documentation files (the "Software"),
-+to deal in the Software without restriction, including without limitation
-+the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+and/or sell copies of the Software, and to permit persons to whom the Software
-+is furnished to do so, subject to the following conditions:
-+
-+The above copyright notice and this permission notice shall be included in
-+all copies or substantial portions of the Software.
-+
-+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
-+THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-diff --git a/third_party/rust/any_all_workaround/README.md b/third_party/rust/any_all_workaround/README.md
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/README.md
-@@ -0,0 +1,13 @@
-+# any_all_workaround
-+
-+This is a workaround for bad codegen ([Rust bug](https://github.com/rust-lang/portable-simd/issues/146), [LLVM bug](https://github.com/llvm/llvm-project/issues/50466)) for the `any()` and `all()` reductions for NEON-backed SIMD vectors on 32-bit ARM. On other platforms these delegate to `any()` and `all()` in `core::simd`.
-+
-+The plan is to abandon this crate once the LLVM bug is fixed or `core::simd` works around the LLVM bug.
-+
-+The code is forked from the [`packed_simd` crate](https://raw.githubusercontent.com/hsivonen/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs).
-+
-+This crate requires Nightly Rust as it depends on the `portable_simd` feature.
-+
-+# License
-+
-+`MIT OR Apache-2.0`, since that's how `packed_simd` is licensed. (The ARM intrinsics Rust version workaround is from qcms, see LICENSE-MIT-QCMS.)
-diff --git a/third_party/rust/any_all_workaround/build.rs b/third_party/rust/any_all_workaround/build.rs
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/build.rs
-@@ -0,0 +1,7 @@
-+extern crate version_check as rustc;
-+
-+fn main() {
-+ if rustc::is_min_version("1.78.0").unwrap_or(false) {
-+ println!("cargo:rustc-cfg=stdsimd_split");
-+ }
-+}
-diff --git a/third_party/rust/any_all_workaround/src/lib.rs b/third_party/rust/any_all_workaround/src/lib.rs
-new file mode 100644
---- /dev/null
-+++ third_party/rust/any_all_workaround/src/lib.rs
-@@ -0,0 +1,110 @@
-+// This code began as a fork of
-+// https://raw.githubusercontent.com/rust-lang/packed_simd/d938e39bee9bc5c222f5f2f2a0df9e53b5ce36ae/src/codegen/reductions/mask/arm.rs
-+// which didn't have a license header on the file, but Cargo.toml said "MIT OR Apache-2.0".
-+// See LICENSE-MIT and LICENSE-APACHE.
-+
-+#![no_std]
-+#![feature(portable_simd)]
-+#![cfg_attr(
-+ all(
-+ stdsimd_split,
-+ target_arch = "arm",
-+ target_endian = "little",
-+ target_feature = "neon",
-+ target_feature = "v7"
-+ ),
-+ feature(stdarch_arm_neon_intrinsics)
-+)]
-+#![cfg_attr(
-+ all(
-+ not(stdsimd_split),
-+ target_arch = "arm",
-+ target_endian = "little",
-+ target_feature = "neon",
-+ target_feature = "v7"
-+ ),
-+ feature(stdsimd)
-+)]
-+
-+use cfg_if::cfg_if;
-+use core::simd::mask16x8;
-+use core::simd::mask32x4;
-+use core::simd::mask8x16;
-+
-+cfg_if! {
-+ if #[cfg(all(target_arch = "arm", target_endian = "little", target_feature = "neon", target_feature = "v7"))] {
-+ use core::simd::mask8x8;
-+ use core::simd::mask16x4;
-+ use core::simd::mask32x2;
-+ macro_rules! arm_128_v7_neon_impl {
-+ ($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
-+ #[inline]
-+ pub fn $all(s: $id) -> bool {
-+ use core::arch::arm::$vpmin;
-+ use core::mem::transmute;
-+ unsafe {
-+ union U {
-+ halves: ($half, $half),
-+ vec: $id,
-+ }
-+ let halves = U { vec: s }.halves;
-+ let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
-+ h.all()
-+ }
-+ }
-+ #[inline]
-+ pub fn $any(s: $id) -> bool {
-+ use core::arch::arm::$vpmax;
-+ use core::mem::transmute;
-+ unsafe {
-+ union U {
-+ halves: ($half, $half),
-+ vec: $id,
-+ }
-+ let halves = U { vec: s }.halves;
-+ let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
-+ h.any()
-+ }
-+ }
-+ }
-+ }
-+ } else {
-+ macro_rules! arm_128_v7_neon_impl {
-+ ($all:ident, $any:ident, $id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
-+ #[inline(always)]
-+ pub fn $all(s: $id) -> bool {
-+ s.all()
-+ }
-+ #[inline(always)]
-+ pub fn $any(s: $id) -> bool {
-+ s.any()
-+ }
-+ }
-+ }
-+ }
-+}
-+
-+arm_128_v7_neon_impl!(
-+ all_mask8x16,
-+ any_mask8x16,
-+ mask8x16,
-+ mask8x8,
-+ vpmin_u8,
-+ vpmax_u8
-+);
-+arm_128_v7_neon_impl!(
-+ all_mask16x8,
-+ any_mask16x8,
-+ mask16x8,
-+ mask16x4,
-+ vpmin_u16,
-+ vpmax_u16
-+);
-+arm_128_v7_neon_impl!(
-+ all_mask32x4,
-+ any_mask32x4,
-+ mask32x4,
-+ mask32x2,
-+ vpmin_u32,
-+ vpmax_u32
-+);
-diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json
---- third_party/rust/encoding_rs/.cargo-checksum.json
-+++ third_party/rust/encoding_rs/.cargo-checksum.json
-@@ -1,1 +1,1 @@
--{"files":{"CONTRIBUTING.md":"ca1901f3e8532fb4cec894fd3664f0eaa898c0c4b961d1b992d1ed54eacf362a","COPYRIGHT":"11789f45bb180841cd362a5eee6789c68ddb573a11105e30768c308a6add0190","Cargo.toml":"42fa83322aa9fd6723b77d35d0cacb92cbb6e7f573ce11c55f5225292866f8f4","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"3fa4ca83dcc9237839b1bdeb2e6d16bdfb5ec0c5ce42b24694d8bbf0dcbef72c","LICENSE-WHATWG":"838118388fe5c2e7f1dbbaeed13e1c7f3ebf88be91319c7c1d77c18e987d1a50","README.md":"d938e8ab0b9ab67e74a1a4f48f23fdce956d0ad3a3f6147ae7612a92763c88d5","ci/miri.sh":"43cb8d82f49e3bfe2d2274b6ccd6f0714a4188ccef0cecc040829883cfdbee25","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"be989dd25c6b946e3e8745fdc8e8a80fcf24b3be99ad0b4b78153ba3f6ab6310","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"c44c002641adb5ebc4368707a8cc0a076d2f33e6a5c27b1b69988eb515f5653d","src/big5.rs":"ec6e2913011a38e9a3e825a1731f139a7ca1d5b264fefae51a3cc1a68a57cef9","src/data.rs":"8a617cc57032092d65850eb27e00de687c80aea3299e839a1f58b42d0b35abf3","src/euc_jp.rs":"32047f5b540188c4cb19c07165f846b9786a09f18e315ed3e9bda1293dae52aa","src/euc_kr.rs":"9b25afc72d9378700eecfac58d55ad1c5946d6cd0ccde2c29c08200ef2de6bb9","src/gb18030.rs":"808587168d73f0c80f8520f0ca9b161866ed2efeb17a05e85fdf3b8efe7ba28a","src/handles.rs":"cc83dc0754751d67f5688a65c5e0191cba02f6bacce81a0813a243cba55eef7a","src/iso_2022_jp.rs":"9bb485e82574f4b7d4b2364f0ff276acb6a0bc111758420a3b0ec5e04c196652","src/lib.rs":"1dc07b818e45846b16ddcaf0de46c8862dd7df8099123ec38b95c3f8ad9c91ec","src/macros.rs":"200997f8870de8bfd8cdc475e92115df42108c0df661e49d3d1cbc32056e1d99","src/mem.rs":"0bf34103e0ad1b842a13a082dee2b920b05cf4fb0f145c9ee7f608f4cb4a544f","src/replacement.rs":"7660b34a53f8c1ca2bdfa0e51e843ec28326950952ad8bc96569feb93ac62308","src/shift_jis.rs":"6951ae67e36b1a12fa3a30734957f444d8b1b4ae0e2bde52060b29bd0f16d9d9","src/simd_funcs.rs":"2612aba86e1d201096d7e47a859bc3444f85934cc82d8adc6d39a4304d9eecfc","src/single_byte.rs":"3c9e9c1f946ae622c725ba9421240c1faa9a05e95fa10dd4642a25cb276a1edc","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"23a2e11b02b3b8d15fb5613a625e3edb2c61e70e3c581abfd638719a4088200d","src/testing.rs":"f59e671e95a98a56f6b573e8c6be4d71e670bf52f7e20eb1605d990aafa1894e","src/utf_16.rs":"c071a147fad38d750c2c247e141b76b929a48007b99f26b2922b9caecdaf2f25","src/utf_8.rs":"7b7d887b347f1aefa03246b028a36a72758a4ce76c28f3b45c19467851aa7839","src/variant.rs":"1fab5363588a1554a7169de8731ea9cded7ac63ea35caabdd1c27a8dde68c27b","src/x_user_defined.rs":"c9c010730dfb9f141d4fed19350c08a21af240913a54bb64f5ca89ff93b6b7d1"},"package":"7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"}
-\ No newline at end of file
-+{"files":{"CONTRIBUTING.md":"ca1901f3e8532fb4cec894fd3664f0eaa898c0c4b961d1b992d1ed54eacf362a","COPYRIGHT":"11789f45bb180841cd362a5eee6789c68ddb573a11105e30768c308a6add0190","Cargo.toml":"22a4d210c92dae9f32c6944ef340ee8fdd027f99c081577e8907123e2a93383e","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"3fa4ca83dcc9237839b1bdeb2e6d16bdfb5ec0c5ce42b24694d8bbf0dcbef72c","LICENSE-WHATWG":"838118388fe5c2e7f1dbbaeed13e1c7f3ebf88be91319c7c1d77c18e987d1a50","README.md":"1d08aefcb92afa81b18154049c9abbcad4540a23f7172e9f9bbed5af33f1a087","ci/miri.sh":"43cb8d82f49e3bfe2d2274b6ccd6f0714a4188ccef0cecc040829883cfdbee25","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"be989dd25c6b946e3e8745fdc8e8a80fcf24b3be99ad0b4b78153ba3f6ab6310","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"588e38b01e666d5e7462617ea7e90a108d608dec9e016f3d273ac0744af2e05d","src/big5.rs":"ec6e2913011a38e9a3e825a1731f139a7ca1d5b264fefae51a3cc1a68a57cef9","src/data.rs":"8a617cc57032092d65850eb27e00de687c80aea3299e839a1f58b42d0b35abf3","src/euc_jp.rs":"32047f5b540188c4cb19c07165f846b9786a09f18e315ed3e9bda1293dae52aa","src/euc_kr.rs":"9b25afc72d9378700eecfac58d55ad1c5946d6cd0ccde2c29c08200ef2de6bb9","src/gb18030.rs":"808587168d73f0c80f8520f0ca9b161866ed2efeb17a05e85fdf3b8efe7ba28a","src/handles.rs":"b08cef1f5785bb6a4822f2e844c6df1b046b737b7a075e4593eaa8c4208e9fe2","src/iso_2022_jp.rs":"9bb485e82574f4b7d4b2364f0ff276acb6a0bc111758420a3b0ec5e04c196652","src/lib.rs":"834f44b670ec48ee82c0e12223d1567313fdd9f88bca5f4b117c82c1828f559f","src/macros.rs":"200997f8870de8bfd8cdc475e92115df42108c0df661e49d3d1cbc32056e1d99","src/mem.rs":"948571137d3b151df8db4fb2c733e74ae595d055cdf0ad83abcab9341d6adabe","src/replacement.rs":"7660b34a53f8c1ca2bdfa0e51e843ec28326950952ad8bc96569feb93ac62308","src/shift_jis.rs":"6951ae67e36b1a12fa3a30734957f444d8b1b4ae0e2bde52060b29bd0f16d9d9","src/simd_funcs.rs":"05c6e77af74bfe73cd39a752067c11425d6b46e5da419910f54bf75a5c02a984","src/single_byte.rs":"3ad87116fb339434a4b58e8f2b15485f2b66b9f7814d708f16194ed08f6d6ccf","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"23a2e11b02b3b8d15fb5613a625e3edb2c61e70e3c581abfd638719a4088200d","src/testing.rs":"f59e671e95a98a56f6b573e8c6be4d71e670bf52f7e20eb1605d990aafa1894e","src/utf_16.rs":"c071a147fad38d750c2c247e141b76b929a48007b99f26b2922b9caecdaf2f25","src/utf_8.rs":"7b7d887b347f1aefa03246b028a36a72758a4ce76c28f3b45c19467851aa7839","src/variant.rs":"1fab5363588a1554a7169de8731ea9cded7ac63ea35caabdd1c27a8dde68c27b","src/x_user_defined.rs":"9456ca46168ef86c98399a2536f577ef7be3cdde90c0c51392d8ac48519d3fae"},"package":"b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"}
-\ No newline at end of file
-diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml
---- third_party/rust/encoding_rs/Cargo.toml
-+++ third_party/rust/encoding_rs/Cargo.toml
-@@ -6,18 +6,19 @@
- # to registry (e.g., crates.io) dependencies.
- #
- # If you are reading this file be aware that the original Cargo.toml
- # will likely look very different (and much more reasonable).
- # See Cargo.toml.orig for the original contents.
-
- [package]
- edition = "2018"
-+rust-version = "1.36"
- name = "encoding_rs"
--version = "0.8.33"
-+version = "0.8.34"
- authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
- description = "A Gecko-oriented implementation of the Encoding Standard"
- homepage = "https://docs.rs/encoding_rs/"
- documentation = "https://docs.rs/encoding_rs/"
- readme = "README.md"
- keywords = [
- "encoding",
- "web",
-@@ -31,23 +32,23 @@ categories = [
- "internationalization",
- ]
- license = "(Apache-2.0 OR MIT) AND BSD-3-Clause"
- repository = "https://github.com/hsivonen/encoding_rs"
-
- [profile.release]
- lto = true
-
-+[dependencies.any_all_workaround]
-+version = "0.1.0"
-+optional = true
-+
- [dependencies.cfg-if]
- version = "1.0"
-
--[dependencies.packed_simd]
--version = "0.3.9"
--optional = true
--
- [dependencies.serde]
- version = "1.0"
- optional = true
-
- [dev-dependencies.bincode]
- version = "1.0"
-
- [dev-dependencies.serde_derive]
-@@ -69,15 +70,9 @@ fast-legacy-encode = [
- "fast-hanja-encode",
- "fast-kanji-encode",
- "fast-gb-hanzi-encode",
- "fast-big5-hanzi-encode",
- ]
- less-slow-big5-hanzi-encode = []
- less-slow-gb-hanzi-encode = []
- less-slow-kanji-encode = []
--simd-accel = [
-- "packed_simd",
-- "packed_simd/into_bits",
--]
--
--[badges.travis-ci]
--repository = "hsivonen/encoding_rs"
-+simd-accel = ["any_all_workaround"]
-diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md
---- third_party/rust/encoding_rs/README.md
-+++ third_party/rust/encoding_rs/README.md
-@@ -162,50 +162,36 @@ wrappers.
- * [C++](https://github.com/hsivonen/recode_cpp)
-
- ## Optional features
-
- There are currently these optional cargo features:
-
- ### `simd-accel`
-
--Enables SIMD acceleration using the nightly-dependent `packed_simd` crate.
-+Enables SIMD acceleration using the nightly-dependent `portable_simd` standard
-+library feature.
-
- This is an opt-in feature, because enabling this feature _opts out_ of Rust's
- guarantees of future compilers compiling old code (aka. "stability story").
-
- Currently, this has not been tested to be an improvement except for these
--targets:
-+targets and enabling the `simd-accel` feature is expected to break the build
-+on other targets:
-
- * x86_64
- * i686
- * aarch64
- * thumbv7neon
-
- If you use nightly Rust, you use targets whose first component is one of the
- above, and you are prepared _to have to revise your configuration when updating
- Rust_, you should enable this feature. Otherwise, please _do not_ enable this
- feature.
-
--_Note!_ If you are compiling for a target that does not have 128-bit SIMD
--enabled as part of the target definition and you are enabling 128-bit SIMD
--using `-C target_feature`, you need to enable the `core_arch` Cargo feature
--for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of
--using the standard-library copy of `core::arch`, because the `core::arch`
--module of the pre-compiled standard library has been compiled with the
--assumption that the CPU doesn't have 128-bit SIMD. At present this applies
--mainly to 32-bit ARM targets whose first component does not include the
--substring `neon`.
--
--The encoding_rs side of things has not been properly set up for POWER,
--PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow
--the advice from the previous paragraph, you probably shouldn't use
--the `simd-accel` option on the less mainstream architectures at this
--time.
--
- Used by Firefox.
-
- ### `serde`
-
- Enables support for serializing and deserializing `&'static Encoding`-typed
- struct fields using [Serde][1].
-
- [1]: https://serde.rs/
-@@ -376,18 +362,19 @@ It is a goal to support the latest stabl
- the version of Rust that's used for Firefox Nightly.
-
- At this time, there is no firm commitment to support a version older than
- what's required by Firefox, and there is no commitment to treat MSRV changes
- as semver-breaking, because this crate depends on `cfg-if`, which doesn't
- appear to treat MSRV changes as semver-breaking, so it would be useless for
- this crate to treat MSRV changes as semver-breaking.
-
--As of 2021-02-04, MSRV appears to be Rust 1.36.0 for using the crate and
-+As of 2024-04-04, MSRV appears to be Rust 1.36.0 for using the crate and
- 1.42.0 for doc tests to pass without errors about the global allocator.
-+With the `simd-accel` feature, the MSRV is even higher.
-
- ## Compatibility with rust-encoding
-
- A compatibility layer that implements the rust-encoding API on top of
- encoding_rs is
- [provided as a separate crate](https://github.com/hsivonen/encoding_rs_compat)
- (cannot be uploaded to crates.io). The compatibility layer was originally
- written with the assuption that Firefox would need it, but it is not currently
-@@ -441,20 +428,27 @@ To regenerate the generated code:
- - [x] Implement the rust-encoding API in terms of encoding_rs.
- - [x] Add SIMD acceleration for Aarch64.
- - [x] Investigate the use of NEON on 32-bit ARM.
- - [ ] ~Investigate Björn Höhrmann's lookup table acceleration for UTF-8 as
- adapted to Rust in rust-encoding.~
- - [x] Add actually fast CJK encode options.
- - [ ] ~Investigate [Bob Steagall's lookup table acceleration for UTF-8](https://github.com/BobSteagall/CppNow2018/blob/master/FastConversionFromUTF-8/Fast%20Conversion%20From%20UTF-8%20with%20C%2B%2B%2C%20DFAs%2C%20and%20SSE%20Intrinsics%20-%20Bob%20Steagall%20-%20C%2B%2BNow%202018.pdf).~
- - [x] Provide a build mode that works without `alloc` (with lesser API surface).
--- [ ] Migrate to `std::simd` once it is stable and declare 1.0.
-+- [x] Migrate to `std::simd` ~once it is stable and declare 1.0.~
-+- [ ] Migrate `unsafe` slice access by larger types than `u8`/`u16` to `align_to`.
-
- ## Release Notes
-
-+### 0.8.34
-+
-+* Use the `portable_simd` nightly feature of the standard library instead of the `packed_simd` crate. Only affects the `simd-accel` optional nightly feature.
-+* Internal documentation improvements and minor code improvements around `unsafe`.
-+* Added `rust-version` to `Cargo.toml`.
-+
- ### 0.8.33
-
- * Use `packed_simd` instead of `packed_simd_2` again now that updates are back under the `packed_simd` name. Only affects the `simd-accel` optional nightly feature.
-
- ### 0.8.32
-
- * Removed `build.rs`. (This removal should resolve false positives reported by some antivirus products. This may break some build configurations that have opted out of Rust's guarantees against future build breakage.)
- * Internal change to what API is used for reinterpreting the lane configuration of SIMD vectors.
-diff --git a/third_party/rust/encoding_rs/src/ascii.rs b/third_party/rust/encoding_rs/src/ascii.rs
---- third_party/rust/encoding_rs/src/ascii.rs
-+++ third_party/rust/encoding_rs/src/ascii.rs
-@@ -46,71 +46,87 @@ cfg_if! {
- #[allow(dead_code)]
- #[inline(always)]
- fn likely(b: bool) -> bool {
- b
- }
- }
- }
-
-+// Safety invariants for masks: data & mask = 0 for valid ASCII or basic latin utf-16
-+
- // `as` truncates, so works on 32-bit, too.
- #[allow(dead_code)]
- pub const ASCII_MASK: usize = 0x8080_8080_8080_8080u64 as usize;
-
- // `as` truncates, so works on 32-bit, too.
- #[allow(dead_code)]
- pub const BASIC_LATIN_MASK: usize = 0xFF80_FF80_FF80_FF80u64 as usize;
-
- #[allow(unused_macros)]
- macro_rules! ascii_naive {
- ($name:ident, $src_unit:ty, $dst_unit:ty) => {
-+ /// Safety: src and dst must have len_unit elements and be aligned
-+ /// Safety-usable invariant: will return Some() when it fails
-+ /// to convert. The first value will be a u8 that is > 127.
- #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- // Yes, manually omitting the bound check here matters
- // a lot for perf.
- for i in 0..len {
-+ // Safety: len invariant used here
- let code_unit = *(src.add(i));
-+ // Safety: Upholds safety-usable invariant here
- if code_unit > 127 {
- return Some((code_unit, i));
- }
-+ // Safety: len invariant used here
- *(dst.add(i)) = code_unit as $dst_unit;
- }
- return None;
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_alu {
- ($name:ident,
-+ // safety invariant: src/dst MUST be u8
- $src_unit:ty,
- $dst_unit:ty,
-+ // Safety invariant: stride_fn must consume and produce two usizes, and return the index of the first non-ascii when it fails
- $stride_fn:ident) => {
-+ /// Safety: src and dst must have len elements, src is valid for read, dst is valid for
-+ /// write
-+ /// Safety-usable invariant: will return Some() when it fails
-+ /// to convert. The first value will be a u8 that is > 127.
- #[cfg_attr(feature = "cargo-clippy", allow(never_loop, cast_ptr_alignment))]
- #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- let mut offset = 0usize;
- // This loop is only broken out of as a `goto` forward
- loop {
-+ // Safety: until_alignment becomes the number of bytes we need to munch until we are aligned to usize
- let mut until_alignment = {
- // Check if the other unit aligns if we move the narrower unit
- // to alignment.
- // if ::core::mem::size_of::<$src_unit>() == ::core::mem::size_of::<$dst_unit>() {
- // ascii_to_ascii
- let src_alignment = (src as usize) & ALU_ALIGNMENT_MASK;
- let dst_alignment = (dst as usize) & ALU_ALIGNMENT_MASK;
- if src_alignment != dst_alignment {
-+ // Safety: bails early and ends up in the naïve branch where usize-alignment doesn't matter
- break;
- }
- (ALU_ALIGNMENT - src_alignment) & ALU_ALIGNMENT_MASK
- // } else if ::core::mem::size_of::<$src_unit>() < ::core::mem::size_of::<$dst_unit>() {
- // ascii_to_basic_latin
- // let src_until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK;
- // if (dst.add(src_until_alignment) as usize) & ALIGNMENT_MASK != 0 {
- // break;
-@@ -129,74 +145,104 @@ macro_rules! ascii_alu {
- // Moving pointers to alignment seems to be a pessimization on
- // x86_64 for operations that have UTF-16 as the internal
- // Unicode representation. However, since it seems to be a win
- // on ARM (tested ARMv7 code running on ARMv8 [rpi3]), except
- // mixed results when encoding from UTF-16 and since x86 and
- // x86_64 should be using SSE2 in due course, keeping the move
- // to alignment here. It would be good to test on more ARM CPUs
- // and on real MIPS and POWER hardware.
-+ //
-+ // Safety: This is the naïve code once again, for `until_alignment` bytes
- while until_alignment != 0 {
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety: Upholds safety-usable invariant here
- return Some((code_unit, offset));
- }
- *(dst.add(offset)) = code_unit as $dst_unit;
-+ // Safety: offset is the number of bytes copied so far
- offset += 1;
- until_alignment -= 1;
- }
- let len_minus_stride = len - ALU_STRIDE_SIZE;
- loop {
-+ // Safety: num_ascii is known to be a byte index of a non-ascii byte due to stride_fn's invariant
- if let Some(num_ascii) = $stride_fn(
-+ // Safety: These are known to be valid and aligned since we have at
-+ // least ALU_STRIDE_SIZE data in these buffers, and offset is the
-+ // number of elements copied so far, which according to the
-+ // until_alignment calculation above will cause both src and dst to be
-+ // aligned to usize after this add
- src.add(offset) as *const usize,
- dst.add(offset) as *mut usize,
- ) {
- offset += num_ascii;
-+ // Safety: Upholds safety-usable invariant here by indexing into non-ascii byte
- return Some((*(src.add(offset)), offset));
- }
-+ // Safety: offset continues to be the number of bytes copied so far, and
-+ // maintains usize alignment for the next loop iteration
- offset += ALU_STRIDE_SIZE;
-+ // Safety: This is `offset > len - stride. This loop will continue as long as
-+ // `offset <= len - stride`, which means there are `stride` bytes to still be read.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- break;
- }
-+
-+ // Safety: This is the naïve code, same as ascii_naive, and has no requirements
-+ // other than src/dst being valid for the the right lens
- while offset < len {
-+ // Safety: len invariant used here
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety: Upholds safety-usable invariant here
- return Some((code_unit, offset));
- }
-+ // Safety: len invariant used here
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! basic_latin_alu {
- ($name:ident,
-+ // safety invariant: use u8 for src/dest for ascii, and u16 for basic_latin
- $src_unit:ty,
- $dst_unit:ty,
-+ // safety invariant: stride function must munch ALU_STRIDE_SIZE*size(src_unit) bytes off of src and
-+ // write ALU_STRIDE_SIZE*size(dst_unit) bytes to dst
- $stride_fn:ident) => {
-+ /// Safety: src and dst must have len elements, src is valid for read, dst is valid for
-+ /// write
-+ /// Safety-usable invariant: will return Some() when it fails
-+ /// to convert. The first value will be a u8 that is > 127.
- #[cfg_attr(
- feature = "cargo-clippy",
- allow(never_loop, cast_ptr_alignment, cast_lossless)
- )]
- #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- let mut offset = 0usize;
- // This loop is only broken out of as a `goto` forward
- loop {
-+ // Safety: until_alignment becomes the number of bytes we need to munch from src/dest until we are aligned to usize
-+ // We ensure basic-latin has the same alignment as ascii, starting with ascii since it is smaller.
- let mut until_alignment = {
- // Check if the other unit aligns if we move the narrower unit
- // to alignment.
- // if ::core::mem::size_of::<$src_unit>() == ::core::mem::size_of::<$dst_unit>() {
- // ascii_to_ascii
- // let src_alignment = (src as usize) & ALIGNMENT_MASK;
- // let dst_alignment = (dst as usize) & ALIGNMENT_MASK;
- // if src_alignment != dst_alignment {
-@@ -232,66 +278,89 @@ macro_rules! basic_latin_alu {
- // Moving pointers to alignment seems to be a pessimization on
- // x86_64 for operations that have UTF-16 as the internal
- // Unicode representation. However, since it seems to be a win
- // on ARM (tested ARMv7 code running on ARMv8 [rpi3]), except
- // mixed results when encoding from UTF-16 and since x86 and
- // x86_64 should be using SSE2 in due course, keeping the move
- // to alignment here. It would be good to test on more ARM CPUs
- // and on real MIPS and POWER hardware.
-+ //
-+ // Safety: This is the naïve code once again, for `until_alignment` bytes
- while until_alignment != 0 {
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety: Upholds safety-usable invariant here
- return Some((code_unit, offset));
- }
- *(dst.add(offset)) = code_unit as $dst_unit;
-+ // Safety: offset is the number of bytes copied so far
- offset += 1;
- until_alignment -= 1;
- }
- let len_minus_stride = len - ALU_STRIDE_SIZE;
- loop {
- if !$stride_fn(
-+ // Safety: These are known to be valid and aligned since we have at
-+ // least ALU_STRIDE_SIZE data in these buffers, and offset is the
-+ // number of elements copied so far, which according to the
-+ // until_alignment calculation above will cause both src and dst to be
-+ // aligned to usize after this add
- src.add(offset) as *const usize,
- dst.add(offset) as *mut usize,
- ) {
- break;
- }
-+ // Safety: offset continues to be the number of bytes copied so far, and
-+ // maintains usize alignment for the next loop iteration
- offset += ALU_STRIDE_SIZE;
-+ // Safety: This is `offset > len - stride. This loop will continue as long as
-+ // `offset <= len - stride`, which means there are `stride` bytes to still be read.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- break;
- }
-+ // Safety: This is the naïve code once again, for leftover bytes
- while offset < len {
-+ // Safety: len invariant used here
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety: Upholds safety-usable invariant here
- return Some((code_unit, offset));
- }
-+ // Safety: len invariant used here
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! latin1_alu {
-+ // safety invariant: stride function must munch ALU_STRIDE_SIZE*size(src_unit) bytes off of src and
-+ // write ALU_STRIDE_SIZE*size(dst_unit) bytes to dst
- ($name:ident, $src_unit:ty, $dst_unit:ty, $stride_fn:ident) => {
-+ /// Safety: src and dst must have len elements, src is valid for read, dst is valid for
-+ /// write
- #[cfg_attr(
- feature = "cargo-clippy",
- allow(never_loop, cast_ptr_alignment, cast_lossless)
- )]
- #[inline(always)]
- pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
- let mut offset = 0usize;
- // This loop is only broken out of as a `goto` forward
- loop {
-+ // Safety: until_alignment becomes the number of bytes we need to munch from src/dest until we are aligned to usize
-+ // We ensure the UTF-16 side has the same alignment as the Latin-1 side, starting with Latin-1 since it is smaller.
- let mut until_alignment = {
- if ::core::mem::size_of::<$src_unit>() < ::core::mem::size_of::<$dst_unit>() {
- // unpack
- let src_until_alignment = (ALU_ALIGNMENT
- - ((src as usize) & ALU_ALIGNMENT_MASK))
- & ALU_ALIGNMENT_MASK;
- if (dst.wrapping_add(src_until_alignment) as usize) & ALU_ALIGNMENT_MASK
- != 0
-@@ -308,373 +377,485 @@ macro_rules! latin1_alu {
- != 0
- {
- break;
- }
- dst_until_alignment
- }
- };
- if until_alignment + ALU_STRIDE_SIZE <= len {
-+ // Safety: This is the naïve code once again, for `until_alignment` bytes
- while until_alignment != 0 {
- let code_unit = *(src.add(offset));
- *(dst.add(offset)) = code_unit as $dst_unit;
-+ // Safety: offset is the number of bytes copied so far
- offset += 1;
- until_alignment -= 1;
- }
- let len_minus_stride = len - ALU_STRIDE_SIZE;
- loop {
- $stride_fn(
-+ // Safety: These are known to be valid and aligned since we have at
-+ // least ALU_STRIDE_SIZE data in these buffers, and offset is the
-+ // number of elements copied so far, which according to the
-+ // until_alignment calculation above will cause both src and dst to be
-+ // aligned to usize after this add
- src.add(offset) as *const usize,
- dst.add(offset) as *mut usize,
- );
-+ // Safety: offset continues to be the number of bytes copied so far, and
-+ // maintains usize alignment for the next loop iteration
- offset += ALU_STRIDE_SIZE;
-+ // Safety: This is `offset > len - stride. This loop will continue as long as
-+ // `offset <= len - stride`, which means there are `stride` bytes to still be read.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- break;
- }
-+ // Safety: This is the naïve code once again, for leftover bytes
- while offset < len {
-+ // Safety: len invariant used here
- let code_unit = *(src.add(offset));
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_simd_check_align {
- (
- $name:ident,
- $src_unit:ty,
- $dst_unit:ty,
-+ // Safety: This function must require aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_both_aligned:ident,
-+ // Safety: This function must require aligned/unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_src_aligned:ident,
-+ // Safety: This function must require unaligned/aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_dst_aligned:ident,
-+ // Safety: This function must require unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_neither_aligned:ident
- ) => {
-+ /// Safety: src/dst must be valid for reads/writes of `len` elements of their units.
-+ ///
-+ /// Safety-usable invariant: will return Some() when it encounters non-ASCII, with the first element in the Some being
-+ /// guaranteed to be non-ASCII (> 127), and the second being the offset where it is found
- #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `SIMD_STRIDE_SIZE` elements.
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
- // XXX Should we first process one stride unconditionally as unaligned to
- // avoid the cost of the branchiness below if the first stride fails anyway?
- // XXX Should we just use unaligned SSE2 access unconditionally? It seems that
- // on Haswell, it would make sense to just use unaligned and not bother
- // checking. Need to benchmark older architectures before deciding.
- let dst_masked = (dst as usize) & SIMD_ALIGNMENT_MASK;
-+ // Safety: checking whether src is aligned
- if ((src as usize) & SIMD_ALIGNMENT_MASK) == 0 {
-+ // Safety: Checking whether dst is aligned
- if dst_masked == 0 {
- loop {
-+ // Safety: We're valid to read/write SIMD_STRIDE_SIZE elements and have the appropriate alignments
- if !$stride_both_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE` which means we always have at least `SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- } else {
- loop {
-+ // Safety: We're valid to read/write SIMD_STRIDE_SIZE elements and have the appropriate alignments
- if !$stride_src_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE` which means we always have at least `SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- } else {
- if dst_masked == 0 {
- loop {
-+ // Safety: We're valid to read/write SIMD_STRIDE_SIZE elements and have the appropriate alignments
- if !$stride_dst_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE` which means we always have at least `SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- } else {
- loop {
-+ // Safety: We're valid to read/write SIMD_STRIDE_SIZE elements and have the appropriate alignments
- if !$stride_neither_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE` which means we always have at least `SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- }
- }
- while offset < len {
-+ // Safety: uses len invariant here and below
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety: upholds safety-usable invariant
- return Some((code_unit, offset));
- }
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_simd_check_align_unrolled {
- (
- $name:ident,
- $src_unit:ty,
- $dst_unit:ty,
-+ // Safety: This function must require aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_both_aligned:ident,
-+ // Safety: This function must require aligned/unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_src_aligned:ident,
-+ // Safety: This function must require unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_neither_aligned:ident,
-+ // Safety: This function must require aligned src/dest that are valid for reading/writing 2*SIMD_STRIDE_SIZE src_unit/dst_unit
- $double_stride_both_aligned:ident,
-+ // Safety: This function must require aligned/unaligned src/dest that are valid for reading/writing 2*SIMD_STRIDE_SIZE src_unit/dst_unit
- $double_stride_src_aligned:ident
- ) => {
-- #[inline(always)]
-+ /// Safety: src/dst must be valid for reads/writes of `len` elements of their units.
-+ ///
-+ /// Safety-usable invariant: will return Some() when it encounters non-ASCII, with the first element in the Some being
-+ /// guaranteed to be non-ASCII (> 127), and the second being the offset where it is found #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- let unit_size = ::core::mem::size_of::<$src_unit>();
- let mut offset = 0usize;
- // This loop is only broken out of as a goto forward without
- // actually looping
- 'outer: loop {
-+ // Safety: if this check succeeds we're valid for reading/writing at least `SIMD_STRIDE_SIZE` elements.
- if SIMD_STRIDE_SIZE <= len {
- // First, process one unaligned
-+ // Safety: this is safe to call since we're valid for this read/write
- if !$stride_neither_aligned(src, dst) {
- break 'outer;
- }
- offset = SIMD_STRIDE_SIZE;
-
- // We have now seen 16 ASCII bytes. Let's guess that
- // there will be enough more to justify more expense
- // in the case of non-ASCII.
- // Use aligned reads for the sake of old microachitectures.
-+ //
-+ // Safety: this correctly calculates the number of src_units that need to be read before the remaining list is aligned.
-+ // This is less that SIMD_ALIGNMENT, which is also SIMD_STRIDE_SIZE (as documented)
- let until_alignment = ((SIMD_ALIGNMENT
- - ((src.add(offset) as usize) & SIMD_ALIGNMENT_MASK))
- & SIMD_ALIGNMENT_MASK)
- / unit_size;
-- // This addition won't overflow, because even in the 32-bit PAE case the
-+ // Safety: This addition won't overflow, because even in the 32-bit PAE case the
- // address space holds enough code that the slice length can't be that
- // close to address space size.
- // offset now equals SIMD_STRIDE_SIZE, hence times 3 below.
-+ //
-+ // Safety: if this check succeeds we're valid for reading/writing at least `2 * SIMD_STRIDE_SIZE` elements plus `until_alignment`.
-+ // The extra SIMD_STRIDE_SIZE in the condition is because `offset` is already `SIMD_STRIDE_SIZE`.
- if until_alignment + (SIMD_STRIDE_SIZE * 3) <= len {
- if until_alignment != 0 {
-+ // Safety: this is safe to call since we're valid for this read/write (and more), and don't care about alignment
-+ // This will copy over bytes that get decoded twice since it's not incrementing `offset` by SIMD_STRIDE_SIZE. This is fine.
- if !$stride_neither_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += until_alignment;
- }
-+ // Safety: At this point we're valid for reading/writing 2*SIMD_STRIDE_SIZE elements
-+ // Safety: Now `offset` is aligned for `src`
- let len_minus_stride_times_two = len - (SIMD_STRIDE_SIZE * 2);
-+ // Safety: This is whether dst is aligned
- let dst_masked = (dst.add(offset) as usize) & SIMD_ALIGNMENT_MASK;
- if dst_masked == 0 {
- loop {
-+ // Safety: both are aligned, we can call the aligned function. We're valid for reading/writing double stride from the initial condition
-+ // and the loop break condition below
- if let Some(advance) =
- $double_stride_both_aligned(src.add(offset), dst.add(offset))
- {
- offset += advance;
- let code_unit = *(src.add(offset));
-+ // Safety: uses safety-usable invariant on ascii_to_ascii_simd_double_stride to return
-+ // guaranteed non-ascii
- return Some((code_unit, offset));
- }
- offset += SIMD_STRIDE_SIZE * 2;
-+ // Safety: This is `offset > len - 2 * SIMD_STRIDE_SIZE` which means we always have at least `2 * SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride_times_two {
- break;
- }
- }
-+ // Safety: We're valid for reading/writing one more, and can still assume alignment
- if offset + SIMD_STRIDE_SIZE <= len {
- if !$stride_both_aligned(src.add(offset), dst.add(offset)) {
- break 'outer;
- }
- offset += SIMD_STRIDE_SIZE;
- }
- } else {
- loop {
-+ // Safety: only src is aligned here. We're valid for reading/writing double stride from the initial condition
-+ // and the loop break condition below
- if let Some(advance) =
- $double_stride_src_aligned(src.add(offset), dst.add(offset))
- {
- offset += advance;
- let code_unit = *(src.add(offset));
-+ // Safety: uses safety-usable invariant on ascii_to_ascii_simd_double_stride to return
-+ // guaranteed non-ascii
- return Some((code_unit, offset));
- }
- offset += SIMD_STRIDE_SIZE * 2;
-+ // Safety: This is `offset > len - 2 * SIMD_STRIDE_SIZE` which means we always have at least `2 * SIMD_STRIDE_SIZE` elements to munch next time.
-+
- if offset > len_minus_stride_times_two {
- break;
- }
- }
-+ // Safety: We're valid for reading/writing one more, and can still assume alignment
- if offset + SIMD_STRIDE_SIZE <= len {
- if !$stride_src_aligned(src.add(offset), dst.add(offset)) {
- break 'outer;
- }
- offset += SIMD_STRIDE_SIZE;
- }
- }
- } else {
- // At most two iterations, so unroll
- if offset + SIMD_STRIDE_SIZE <= len {
-+ // Safety: The check above ensures we're allowed to read/write this, and we don't use alignment
- if !$stride_neither_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
- if offset + SIMD_STRIDE_SIZE <= len {
-+ // Safety: The check above ensures we're allowed to read/write this, and we don't use alignment
- if !$stride_neither_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
- }
- }
- }
- }
- break 'outer;
- }
- while offset < len {
-+ // Safety: relies straightforwardly on the `len` invariant
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety-usable invariant upheld here
- return Some((code_unit, offset));
- }
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! latin1_simd_check_align {
- (
- $name:ident,
- $src_unit:ty,
- $dst_unit:ty,
-+ // Safety: This function must require aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_both_aligned:ident,
-+ // Safety: This function must require aligned/unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_src_aligned:ident,
-+ // Safety: This function must require unaligned/aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_dst_aligned:ident,
-+ // Safety: This function must require unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_neither_aligned:ident
-+
- ) => {
-+ /// Safety: src/dst must be valid for reads/writes of `len` elements of their units.
- #[inline(always)]
- pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `SIMD_STRIDE_SIZE` elements.
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
-+ // Whether dst is aligned
- let dst_masked = (dst as usize) & SIMD_ALIGNMENT_MASK;
-+ // Whether src is aligned
- if ((src as usize) & SIMD_ALIGNMENT_MASK) == 0 {
- if dst_masked == 0 {
- loop {
-+ // Safety: Both were aligned, we can use the aligned function
- $stride_both_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which means in the next iteration we're valid for
-+ // reading/writing at least SIMD_STRIDE_SIZE elements.
- if offset > len_minus_stride {
- break;
- }
- }
- } else {
- loop {
-+ // Safety: src was aligned, dst was not
- $stride_src_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which means in the next iteration we're valid for
-+ // reading/writing at least SIMD_STRIDE_SIZE elements.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- } else {
- if dst_masked == 0 {
- loop {
-+ // Safety: src was aligned, dst was not
- $stride_dst_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which means in the next iteration we're valid for
-+ // reading/writing at least SIMD_STRIDE_SIZE elements.
- if offset > len_minus_stride {
- break;
- }
- }
- } else {
- loop {
-+ // Safety: Neither were aligned
- $stride_neither_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which means in the next iteration we're valid for
-+ // reading/writing at least SIMD_STRIDE_SIZE elements.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- }
- }
- while offset < len {
-+ // Safety: relies straightforwardly on the `len` invariant
- let code_unit = *(src.add(offset));
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! latin1_simd_check_align_unrolled {
- (
- $name:ident,
- $src_unit:ty,
- $dst_unit:ty,
-+ // Safety: This function must require aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_both_aligned:ident,
-+ // Safety: This function must require aligned/unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_src_aligned:ident,
-+ // Safety: This function must require unaligned/aligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_dst_aligned:ident,
-+ // Safety: This function must require unaligned src/dest that are valid for reading/writing SIMD_STRIDE_SIZE src_unit/dst_unit
- $stride_neither_aligned:ident
- ) => {
-+ /// Safety: src/dst must be valid for reads/writes of `len` elements of their units.
- #[inline(always)]
- pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
- let unit_size = ::core::mem::size_of::<$src_unit>();
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `SIMD_STRIDE_SIZE` elements.
- if SIMD_STRIDE_SIZE <= len {
-+ // Safety: this correctly calculates the number of src_units that need to be read before the remaining list is aligned.
-+ // This is by definition less than SIMD_STRIDE_SIZE.
- let mut until_alignment = ((SIMD_STRIDE_SIZE
- - ((src as usize) & SIMD_ALIGNMENT_MASK))
- & SIMD_ALIGNMENT_MASK)
- / unit_size;
- while until_alignment != 0 {
-+ // Safety: This is a straightforward copy, since until_alignment is < SIMD_STRIDE_SIZE < len, this is in-bounds
- *(dst.add(offset)) = *(src.add(offset)) as $dst_unit;
- offset += 1;
- until_alignment -= 1;
- }
-+ // Safety: here offset will be `until_alignment`, i.e. enough to align `src`.
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `2 * SIMD_STRIDE_SIZE` elements.
- if offset + SIMD_STRIDE_SIZE * 2 <= len {
- let len_minus_stride_times_two = len_minus_stride - SIMD_STRIDE_SIZE;
-+ // Safety: at this point src is known to be aligned at offset, dst is not.
- if (dst.add(offset) as usize) & SIMD_ALIGNMENT_MASK == 0 {
- loop {
-+ // Safety: We checked alignment of dst above, we can use the alignment functions. We're allowed to read/write 2*SIMD_STRIDE_SIZE elements, which we do.
- $stride_both_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
- $stride_both_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - 2 * SIMD_STRIDE_SIZE` which means we always have at least `2 * SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride_times_two {
- break;
- }
- }
- } else {
- loop {
-+ // Safety: we ensured alignment of src already.
- $stride_src_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
- $stride_src_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - 2 * SIMD_STRIDE_SIZE` which means we always have at least `2 * SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride_times_two {
- break;
- }
- }
- }
- }
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE` which means we are valid to munch SIMD_STRIDE_SIZE more elements, which we do
- if offset < len_minus_stride {
- $stride_src_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
- }
- }
- while offset < len {
-+ // Safety: uses len invariant here and below
- let code_unit = *(src.add(offset));
- // On x86_64, this loop autovectorizes but in the pack
- // case there are instructions whose purpose is to make sure
- // each u16 in the vector is truncated before packing. However,
- // since we don't care about saturating behavior of SSE2 packing
- // when the input isn't Latin1, those instructions are useless.
- // Unfortunately, using the `assume` intrinsic to lie to the
- // optimizer doesn't make LLVM omit the trunctation that we
-@@ -688,138 +869,180 @@ macro_rules! latin1_simd_check_align_unr
- offset += 1;
- }
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_simd_unalign {
-+ // Safety: stride_neither_aligned must be a function that requires src/dest be valid for unaligned reads/writes for SIMD_STRIDE_SIZE elements of type src_unit/dest_unit
- ($name:ident, $src_unit:ty, $dst_unit:ty, $stride_neither_aligned:ident) => {
-+ /// Safety: src and dst must be valid for reads/writes of len elements of type src_unit/dst_unit
-+ ///
-+ /// Safety-usable invariant: will return Some() when it encounters non-ASCII, with the first element in the Some being
-+ /// guaranteed to be non-ASCII (> 127), and the second being the offset where it is found
- #[inline(always)]
- pub unsafe fn $name(
- src: *const $src_unit,
- dst: *mut $dst_unit,
- len: usize,
- ) -> Option<($src_unit, usize)> {
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `stride` elements.
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
- loop {
-+ // Safety: We know we're valid for `stride` reads/writes, so we can call this function. We don't need alignment.
- if !$stride_neither_aligned(src.add(offset), dst.add(offset)) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // This is `offset > len - stride` which means we always have at least `stride` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- while offset < len {
-+ // Safety: Uses len invariant here and below
- let code_unit = *(src.add(offset));
- if code_unit > 127 {
-+ // Safety-usable invariant upheld here
- return Some((code_unit, offset));
- }
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! latin1_simd_unalign {
-+ // Safety: stride_neither_aligned must be a function that requires src/dest be valid for unaligned reads/writes for SIMD_STRIDE_SIZE elements of type src_unit/dest_unit
- ($name:ident, $src_unit:ty, $dst_unit:ty, $stride_neither_aligned:ident) => {
-+ /// Safety: src and dst must be valid for unaligned reads/writes of len elements of type src_unit/dst_unit
- #[inline(always)]
- pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) {
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `stride` elements.
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
- loop {
-+ // Safety: We know we're valid for `stride` reads/writes, so we can call this function. We don't need alignment.
- $stride_neither_aligned(src.add(offset), dst.add(offset));
- offset += SIMD_STRIDE_SIZE;
-+ // This is `offset > len - stride` which means we always have at least `stride` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- while offset < len {
-+ // Safety: Uses len invariant here
- let code_unit = *(src.add(offset));
- *(dst.add(offset)) = code_unit as $dst_unit;
- offset += 1;
- }
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_to_ascii_simd_stride {
-+ // Safety: load/store must be valid for 16 bytes of read/write, which may be unaligned. (candidates: `(load|store)(16|8)_(unaligned|aligned)` functions)
- ($name:ident, $load:ident, $store:ident) => {
-+ /// Safety: src and dst must be valid for 16 bytes of read/write according to
-+ /// the $load/$store fn, which may allow for unaligned reads/writes or require
-+ /// alignment to either 16x8 or u8x16.
- #[inline(always)]
- pub unsafe fn $name(src: *const u8, dst: *mut u8) -> bool {
- let simd = $load(src);
- if !simd_is_ascii(simd) {
- return false;
- }
- $store(dst, simd);
- true
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_to_ascii_simd_double_stride {
-+ // Safety: store must be valid for 32 bytes of write, which may be unaligned (candidates: `store(8|16)_(aligned|unaligned)`)
- ($name:ident, $store:ident) => {
-+ /// Safety: src must be valid for 32 bytes of aligned u8x16 read
-+ /// dst must be valid for 32 bytes of unaligned write according to
-+ /// the $store fn, which may allow for unaligned writes or require
-+ /// alignment to either 16x8 or u8x16.
-+ ///
-+ /// Safety-usable invariant: Returns Some(index) if the element at `index` is invalid ASCII
- #[inline(always)]
- pub unsafe fn $name(src: *const u8, dst: *mut u8) -> Option<usize> {
- let first = load16_aligned(src);
- let second = load16_aligned(src.add(SIMD_STRIDE_SIZE));
- $store(dst, first);
- if unlikely(!simd_is_ascii(first | second)) {
-+ // Safety: mask_ascii produces a mask of all the high bits.
- let mask_first = mask_ascii(first);
- if mask_first != 0 {
-+ // Safety: on little endian systems this will be the number of ascii bytes
-+ // before the first non-ascii, i.e. valid for indexing src
-+ // TODO SAFETY: What about big-endian systems?
- return Some(mask_first.trailing_zeros() as usize);
- }
- $store(dst.add(SIMD_STRIDE_SIZE), second);
- let mask_second = mask_ascii(second);
-+ // Safety: on little endian systems this will be the number of ascii bytes
-+ // before the first non-ascii, i.e. valid for indexing src
- return Some(SIMD_STRIDE_SIZE + mask_second.trailing_zeros() as usize);
- }
- $store(dst.add(SIMD_STRIDE_SIZE), second);
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_to_basic_latin_simd_stride {
-+ // Safety: load/store must be valid for 16 bytes of read/write, which may be unaligned. (candidates: `(load|store)(16|8)_(unaligned|aligned)` functions)
- ($name:ident, $load:ident, $store:ident) => {
-+ /// Safety: src and dst must be valid for 16/32 bytes of read/write according to
-+ /// the $load/$store fn, which may allow for unaligned reads/writes or require
-+ /// alignment to either 16x8 or u8x16.
- #[inline(always)]
- pub unsafe fn $name(src: *const u8, dst: *mut u16) -> bool {
- let simd = $load(src);
- if !simd_is_ascii(simd) {
- return false;
- }
- let (first, second) = simd_unpack(simd);
- $store(dst, first);
- $store(dst.add(8), second);
- true
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! ascii_to_basic_latin_simd_double_stride {
-+ // Safety: store must be valid for 16 bytes of write, which may be unaligned
- ($name:ident, $store:ident) => {
-+ /// Safety: src must be valid for 2*SIMD_STRIDE_SIZE bytes of aligned reads,
-+ /// aligned to either 16x8 or u8x16.
-+ /// dst must be valid for 2*SIMD_STRIDE_SIZE bytes of aligned or unaligned reads
- #[inline(always)]
- pub unsafe fn $name(src: *const u8, dst: *mut u16) -> Option<usize> {
- let first = load16_aligned(src);
- let second = load16_aligned(src.add(SIMD_STRIDE_SIZE));
- let (a, b) = simd_unpack(first);
- $store(dst, a);
-+ // Safety: divide by 2 since it's a u16 pointer
- $store(dst.add(SIMD_STRIDE_SIZE / 2), b);
- if unlikely(!simd_is_ascii(first | second)) {
- let mask_first = mask_ascii(first);
- if mask_first != 0 {
- return Some(mask_first.trailing_zeros() as usize);
- }
- let (c, d) = simd_unpack(second);
- $store(dst.add(SIMD_STRIDE_SIZE), c);
-@@ -832,47 +1055,59 @@ macro_rules! ascii_to_basic_latin_simd_d
- $store(dst.add(SIMD_STRIDE_SIZE + (SIMD_STRIDE_SIZE / 2)), d);
- None
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! unpack_simd_stride {
-+ // Safety: load/store must be valid for 16 bytes of read/write, which may be unaligned. (candidates: `(load|store)(16|8)_(unaligned|aligned)` functions)
- ($name:ident, $load:ident, $store:ident) => {
-+ /// Safety: src and dst must be valid for 16 bytes of read/write according to
-+ /// the $load/$store fn, which may allow for unaligned reads/writes or require
-+ /// alignment to either 16x8 or u8x16.
- #[inline(always)]
- pub unsafe fn $name(src: *const u8, dst: *mut u16) {
- let simd = $load(src);
- let (first, second) = simd_unpack(simd);
- $store(dst, first);
- $store(dst.add(8), second);
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! basic_latin_to_ascii_simd_stride {
-+ // Safety: load/store must be valid for 16 bytes of read/write, which may be unaligned. (candidates: `(load|store)(16|8)_(unaligned|aligned)` functions)
- ($name:ident, $load:ident, $store:ident) => {
-+ /// Safety: src and dst must be valid for 32/16 bytes of read/write according to
-+ /// the $load/$store fn, which may allow for unaligned reads/writes or require
-+ /// alignment to either 16x8 or u8x16.
- #[inline(always)]
- pub unsafe fn $name(src: *const u16, dst: *mut u8) -> bool {
- let first = $load(src);
- let second = $load(src.add(8));
- if simd_is_basic_latin(first | second) {
- $store(dst, simd_pack(first, second));
- true
- } else {
- false
- }
- }
- };
- }
-
- #[allow(unused_macros)]
- macro_rules! pack_simd_stride {
-+ // Safety: load/store must be valid for 16 bytes of read/write, which may be unaligned. (candidates: `(load|store)(16|8)_(unaligned|aligned)` functions)
- ($name:ident, $load:ident, $store:ident) => {
-+ /// Safety: src and dst must be valid for 32/16 bytes of read/write according to
-+ /// the $load/$store fn, which may allow for unaligned reads/writes or require
-+ /// alignment to either 16x8 or u8x16.
- #[inline(always)]
- pub unsafe fn $name(src: *const u16, dst: *mut u8) {
- let first = $load(src);
- let second = $load(src.add(8));
- $store(dst, simd_pack(first, second));
- }
- };
- }
-@@ -888,24 +1123,28 @@ cfg_if! {
- // pub const ALIGNMENT: usize = 8;
-
- pub const ALU_STRIDE_SIZE: usize = 16;
-
- pub const ALU_ALIGNMENT: usize = 8;
-
- pub const ALU_ALIGNMENT_MASK: usize = 7;
-
-+ // Safety for stride macros: We stick to the load8_aligned/etc family of functions. We consistently produce
-+ // neither_unaligned variants using only unaligned inputs.
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_neither_aligned, load16_unaligned, store16_unaligned);
-
- ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_neither_aligned, load16_unaligned, store8_unaligned);
- unpack_simd_stride!(unpack_stride_neither_aligned, load16_unaligned, store8_unaligned);
-
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
- pack_simd_stride!(pack_stride_neither_aligned, load8_unaligned, store16_unaligned);
-
-+ // Safety for conversion macros: We use the unalign macro with unalign functions above. All stride functions were produced
-+ // by stride macros that universally munch a single SIMD_STRIDE_SIZE worth of elements.
- ascii_simd_unalign!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_neither_aligned);
- ascii_simd_unalign!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_neither_aligned);
- ascii_simd_unalign!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_neither_aligned);
- latin1_simd_unalign!(unpack_latin1, u8, u16, unpack_stride_neither_aligned);
- latin1_simd_unalign!(pack_latin1, u16, u8, pack_stride_neither_aligned);
- } else if #[cfg(all(feature = "simd-accel", target_endian = "little", target_feature = "neon"))] {
- // SIMD with different instructions for aligned and unaligned loads and stores.
- //
-@@ -914,16 +1153,19 @@ cfg_if! {
- // but the benchmark results I see don't agree.
-
- pub const SIMD_STRIDE_SIZE: usize = 16;
-
- pub const MAX_STRIDE_SIZE: usize = 16;
-
- pub const SIMD_ALIGNMENT_MASK: usize = 15;
-
-+ // Safety for stride macros: We stick to the load8_aligned/etc family of functions. We consistently name
-+ // aligned/unaligned functions according to src/dst being aligned/unaligned
-+
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_both_aligned, load16_aligned, store16_aligned);
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_src_aligned, load16_aligned, store16_unaligned);
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_dst_aligned, load16_unaligned, store16_aligned);
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_neither_aligned, load16_unaligned, store16_unaligned);
-
- ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_both_aligned, load16_aligned, store8_aligned);
- ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_src_aligned, load16_aligned, store8_unaligned);
- ascii_to_basic_latin_simd_stride!(ascii_to_basic_latin_stride_dst_aligned, load16_unaligned, store8_aligned);
-@@ -939,36 +1181,43 @@ cfg_if! {
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_dst_aligned, load8_unaligned, store16_aligned);
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
-
- pack_simd_stride!(pack_stride_both_aligned, load8_aligned, store16_aligned);
- pack_simd_stride!(pack_stride_src_aligned, load8_aligned, store16_unaligned);
- pack_simd_stride!(pack_stride_dst_aligned, load8_unaligned, store16_aligned);
- pack_simd_stride!(pack_stride_neither_aligned, load8_unaligned, store16_unaligned);
-
-+ // Safety for conversion macros: We use the correct pattern of both/src/dst/neither here. All stride functions were produced
-+ // by stride macros that universally munch a single SIMD_STRIDE_SIZE worth of elements.
-+
- ascii_simd_check_align!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_both_aligned, ascii_to_ascii_stride_src_aligned, ascii_to_ascii_stride_dst_aligned, ascii_to_ascii_stride_neither_aligned);
- ascii_simd_check_align!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_both_aligned, ascii_to_basic_latin_stride_src_aligned, ascii_to_basic_latin_stride_dst_aligned, ascii_to_basic_latin_stride_neither_aligned);
- ascii_simd_check_align!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_both_aligned, basic_latin_to_ascii_stride_src_aligned, basic_latin_to_ascii_stride_dst_aligned, basic_latin_to_ascii_stride_neither_aligned);
- latin1_simd_check_align!(unpack_latin1, u8, u16, unpack_stride_both_aligned, unpack_stride_src_aligned, unpack_stride_dst_aligned, unpack_stride_neither_aligned);
- latin1_simd_check_align!(pack_latin1, u16, u8, pack_stride_both_aligned, pack_stride_src_aligned, pack_stride_dst_aligned, pack_stride_neither_aligned);
- } else if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
- // SIMD with different instructions for aligned and unaligned loads and stores.
- //
- // Newer microarchitectures are not supposed to have a performance difference between
- // aligned and unaligned SSE2 loads and stores when the address is actually aligned,
- // but the benchmark results I see don't agree.
-
- pub const SIMD_STRIDE_SIZE: usize = 16;
-
-+ /// Safety-usable invariant: This should be identical to SIMD_STRIDE_SIZE (used by ascii_simd_check_align_unrolled)
- pub const SIMD_ALIGNMENT: usize = 16;
-
- pub const MAX_STRIDE_SIZE: usize = 16;
-
- pub const SIMD_ALIGNMENT_MASK: usize = 15;
-
-+ // Safety for stride macros: We stick to the load8_aligned/etc family of functions. We consistently name
-+ // aligned/unaligned functions according to src/dst being aligned/unaligned
-+
- ascii_to_ascii_simd_double_stride!(ascii_to_ascii_simd_double_stride_both_aligned, store16_aligned);
- ascii_to_ascii_simd_double_stride!(ascii_to_ascii_simd_double_stride_src_aligned, store16_unaligned);
-
- ascii_to_basic_latin_simd_double_stride!(ascii_to_basic_latin_simd_double_stride_both_aligned, store8_aligned);
- ascii_to_basic_latin_simd_double_stride!(ascii_to_basic_latin_simd_double_stride_src_aligned, store8_unaligned);
-
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_both_aligned, load16_aligned, store16_aligned);
- ascii_to_ascii_simd_stride!(ascii_to_ascii_stride_src_aligned, load16_aligned, store16_unaligned);
-@@ -984,33 +1233,43 @@ cfg_if! {
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_both_aligned, load8_aligned, store16_aligned);
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_src_aligned, load8_aligned, store16_unaligned);
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_dst_aligned, load8_unaligned, store16_aligned);
- basic_latin_to_ascii_simd_stride!(basic_latin_to_ascii_stride_neither_aligned, load8_unaligned, store16_unaligned);
-
- pack_simd_stride!(pack_stride_both_aligned, load8_aligned, store16_aligned);
- pack_simd_stride!(pack_stride_src_aligned, load8_aligned, store16_unaligned);
-
-+ // Safety for conversion macros: We use the correct pattern of both/src/dst/neither/double_both/double_src here. All stride functions were produced
-+ // by stride macros that universally munch a single SIMD_STRIDE_SIZE worth of elements.
-+
- ascii_simd_check_align_unrolled!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_both_aligned, ascii_to_ascii_stride_src_aligned, ascii_to_ascii_stride_neither_aligned, ascii_to_ascii_simd_double_stride_both_aligned, ascii_to_ascii_simd_double_stride_src_aligned);
- ascii_simd_check_align_unrolled!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_both_aligned, ascii_to_basic_latin_stride_src_aligned, ascii_to_basic_latin_stride_neither_aligned, ascii_to_basic_latin_simd_double_stride_both_aligned, ascii_to_basic_latin_simd_double_stride_src_aligned);
-
- ascii_simd_check_align!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_both_aligned, basic_latin_to_ascii_stride_src_aligned, basic_latin_to_ascii_stride_dst_aligned, basic_latin_to_ascii_stride_neither_aligned);
- latin1_simd_check_align_unrolled!(unpack_latin1, u8, u16, unpack_stride_both_aligned, unpack_stride_src_aligned, unpack_stride_dst_aligned, unpack_stride_neither_aligned);
- latin1_simd_check_align_unrolled!(pack_latin1, u16, u8, pack_stride_both_aligned, pack_stride_src_aligned, pack_stride_dst_aligned, pack_stride_neither_aligned);
- } else if #[cfg(all(target_endian = "little", target_pointer_width = "64"))] {
- // Aligned ALU word, little-endian, 64-bit
-
-+ /// Safety invariant: this is the amount of bytes consumed by
-+ /// unpack_alu. This will be twice the pointer width, as it consumes two usizes.
-+ /// This is also the number of bytes produced by pack_alu.
-+ /// This is also the number of u16 code units produced/consumed by unpack_alu/pack_alu respectively.
- pub const ALU_STRIDE_SIZE: usize = 16;
-
- pub const MAX_STRIDE_SIZE: usize = 16;
-
-+ // Safety invariant: this is the pointer width in bytes
- pub const ALU_ALIGNMENT: usize = 8;
-
-+ // Safety invariant: this is a mask for getting the bits of a pointer not aligned to ALU_ALIGNMENT
- pub const ALU_ALIGNMENT_MASK: usize = 7;
-
-+ /// Safety: dst must point to valid space for writing four `usize`s
- #[inline(always)]
- unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
- let first = ((0x0000_0000_FF00_0000usize & word) << 24) |
- ((0x0000_0000_00FF_0000usize & word) << 16) |
- ((0x0000_0000_0000_FF00usize & word) << 8) |
- (0x0000_0000_0000_00FFusize & word);
- let second = ((0xFF00_0000_0000_0000usize & word) >> 8) |
- ((0x00FF_0000_0000_0000usize & word) >> 16) |
-@@ -1019,22 +1278,24 @@ cfg_if! {
- let third = ((0x0000_0000_FF00_0000usize & second_word) << 24) |
- ((0x0000_0000_00FF_0000usize & second_word) << 16) |
- ((0x0000_0000_0000_FF00usize & second_word) << 8) |
- (0x0000_0000_0000_00FFusize & second_word);
- let fourth = ((0xFF00_0000_0000_0000usize & second_word) >> 8) |
- ((0x00FF_0000_0000_0000usize & second_word) >> 16) |
- ((0x0000_FF00_0000_0000usize & second_word) >> 24) |
- ((0x0000_00FF_0000_0000usize & second_word) >> 32);
-+ // Safety: fn invariant used here
- *dst = first;
- *(dst.add(1)) = second;
- *(dst.add(2)) = third;
- *(dst.add(3)) = fourth;
- }
-
-+ /// Safety: dst must point to valid space for writing two `usize`s
- #[inline(always)]
- unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
- let word = ((0x00FF_0000_0000_0000usize & second) << 8) |
- ((0x0000_00FF_0000_0000usize & second) << 16) |
- ((0x0000_0000_00FF_0000usize & second) << 24) |
- ((0x0000_0000_0000_00FFusize & second) << 32) |
- ((0x00FF_0000_0000_0000usize & first) >> 24) |
- ((0x0000_00FF_0000_0000usize & first) >> 16) |
-@@ -1043,70 +1304,88 @@ cfg_if! {
- let second_word = ((0x00FF_0000_0000_0000usize & fourth) << 8) |
- ((0x0000_00FF_0000_0000usize & fourth) << 16) |
- ((0x0000_0000_00FF_0000usize & fourth) << 24) |
- ((0x0000_0000_0000_00FFusize & fourth) << 32) |
- ((0x00FF_0000_0000_0000usize & third) >> 24) |
- ((0x0000_00FF_0000_0000usize & third) >> 16) |
- ((0x0000_0000_00FF_0000usize & third) >> 8) |
- (0x0000_0000_0000_00FFusize & third);
-+ // Safety: fn invariant used here
- *dst = word;
- *(dst.add(1)) = second_word;
- }
- } else if #[cfg(all(target_endian = "little", target_pointer_width = "32"))] {
- // Aligned ALU word, little-endian, 32-bit
-
-+ /// Safety invariant: this is the amount of bytes consumed by
-+ /// unpack_alu. This will be twice the pointer width, as it consumes two usizes.
-+ /// This is also the number of bytes produced by pack_alu.
-+ /// This is also the number of u16 code units produced/consumed by unpack_alu/pack_alu respectively.
- pub const ALU_STRIDE_SIZE: usize = 8;
-
- pub const MAX_STRIDE_SIZE: usize = 8;
-
-+ // Safety invariant: this is the pointer width in bytes
- pub const ALU_ALIGNMENT: usize = 4;
-
-+ // Safety invariant: this is a mask for getting the bits of a pointer not aligned to ALU_ALIGNMENT
- pub const ALU_ALIGNMENT_MASK: usize = 3;
-
-+ /// Safety: dst must point to valid space for writing four `usize`s
- #[inline(always)]
- unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
- let first = ((0x0000_FF00usize & word) << 8) |
- (0x0000_00FFusize & word);
- let second = ((0xFF00_0000usize & word) >> 8) |
- ((0x00FF_0000usize & word) >> 16);
- let third = ((0x0000_FF00usize & second_word) << 8) |
- (0x0000_00FFusize & second_word);
- let fourth = ((0xFF00_0000usize & second_word) >> 8) |
- ((0x00FF_0000usize & second_word) >> 16);
-+ // Safety: fn invariant used here
- *dst = first;
- *(dst.add(1)) = second;
- *(dst.add(2)) = third;
- *(dst.add(3)) = fourth;
- }
-
-+ /// Safety: dst must point to valid space for writing two `usize`s
- #[inline(always)]
- unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
- let word = ((0x00FF_0000usize & second) << 8) |
- ((0x0000_00FFusize & second) << 16) |
- ((0x00FF_0000usize & first) >> 8) |
- (0x0000_00FFusize & first);
- let second_word = ((0x00FF_0000usize & fourth) << 8) |
- ((0x0000_00FFusize & fourth) << 16) |
- ((0x00FF_0000usize & third) >> 8) |
- (0x0000_00FFusize & third);
-+ // Safety: fn invariant used here
- *dst = word;
- *(dst.add(1)) = second_word;
- }
- } else if #[cfg(all(target_endian = "big", target_pointer_width = "64"))] {
- // Aligned ALU word, big-endian, 64-bit
-
-+ /// Safety invariant: this is the amount of bytes consumed by
-+ /// unpack_alu. This will be twice the pointer width, as it consumes two usizes.
-+ /// This is also the number of bytes produced by pack_alu.
-+ /// This is also the number of u16 code units produced/consumed by unpack_alu/pack_alu respectively.
- pub const ALU_STRIDE_SIZE: usize = 16;
-
- pub const MAX_STRIDE_SIZE: usize = 16;
-
-+ // Safety invariant: this is the pointer width in bytes
- pub const ALU_ALIGNMENT: usize = 8;
-
-+ // Safety invariant: this is a mask for getting the bits of a pointer not aligned to ALU_ALIGNMENT
- pub const ALU_ALIGNMENT_MASK: usize = 7;
-
-+ /// Safety: dst must point to valid space for writing four `usize`s
- #[inline(always)]
- unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
- let first = ((0xFF00_0000_0000_0000usize & word) >> 8) |
- ((0x00FF_0000_0000_0000usize & word) >> 16) |
- ((0x0000_FF00_0000_0000usize & word) >> 24) |
- ((0x0000_00FF_0000_0000usize & word) >> 32);
- let second = ((0x0000_0000_FF00_0000usize & word) << 24) |
- ((0x0000_0000_00FF_0000usize & word) << 16) |
-@@ -1115,22 +1394,24 @@ cfg_if! {
- let third = ((0xFF00_0000_0000_0000usize & second_word) >> 8) |
- ((0x00FF_0000_0000_0000usize & second_word) >> 16) |
- ((0x0000_FF00_0000_0000usize & second_word) >> 24) |
- ((0x0000_00FF_0000_0000usize & second_word) >> 32);
- let fourth = ((0x0000_0000_FF00_0000usize & second_word) << 24) |
- ((0x0000_0000_00FF_0000usize & second_word) << 16) |
- ((0x0000_0000_0000_FF00usize & second_word) << 8) |
- (0x0000_0000_0000_00FFusize & second_word);
-+ // Safety: fn invariant used here
- *dst = first;
- *(dst.add(1)) = second;
- *(dst.add(2)) = third;
- *(dst.add(3)) = fourth;
- }
-
-+ /// Safety: dst must point to valid space for writing two `usize`s
- #[inline(always)]
- unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
- let word = ((0x00FF0000_00000000usize & first) << 8) |
- ((0x000000FF_00000000usize & first) << 16) |
- ((0x00000000_00FF0000usize & first) << 24) |
- ((0x00000000_000000FFusize & first) << 32) |
- ((0x00FF0000_00000000usize & second) >> 24) |
- ((0x000000FF_00000000usize & second) >> 16) |
-@@ -1139,67 +1420,80 @@ cfg_if! {
- let second_word = ((0x00FF0000_00000000usize & third) << 8) |
- ((0x000000FF_00000000usize & third) << 16) |
- ((0x00000000_00FF0000usize & third) << 24) |
- ((0x00000000_000000FFusize & third) << 32) |
- ((0x00FF0000_00000000usize & fourth) >> 24) |
- ((0x000000FF_00000000usize & fourth) >> 16) |
- ((0x00000000_00FF0000usize & fourth) >> 8) |
- (0x00000000_000000FFusize & fourth);
-+ // Safety: fn invariant used here
- *dst = word;
- *(dst.add(1)) = second_word;
- }
- } else if #[cfg(all(target_endian = "big", target_pointer_width = "32"))] {
- // Aligned ALU word, big-endian, 32-bit
-
-+ /// Safety invariant: this is the amount of bytes consumed by
-+ /// unpack_alu. This will be twice the pointer width, as it consumes two usizes.
-+ /// This is also the number of bytes produced by pack_alu.
-+ /// This is also the number of u16 code units produced/consumed by unpack_alu/pack_alu respectively.
- pub const ALU_STRIDE_SIZE: usize = 8;
-
- pub const MAX_STRIDE_SIZE: usize = 8;
-
-+ // Safety invariant: this is the pointer width in bytes
- pub const ALU_ALIGNMENT: usize = 4;
-
-+ // Safety invariant: this is a mask for getting the bits of a pointer not aligned to ALU_ALIGNMENT
- pub const ALU_ALIGNMENT_MASK: usize = 3;
-
-+ /// Safety: dst must point to valid space for writing four `usize`s
- #[inline(always)]
- unsafe fn unpack_alu(word: usize, second_word: usize, dst: *mut usize) {
- let first = ((0xFF00_0000usize & word) >> 8) |
- ((0x00FF_0000usize & word) >> 16);
- let second = ((0x0000_FF00usize & word) << 8) |
- (0x0000_00FFusize & word);
- let third = ((0xFF00_0000usize & second_word) >> 8) |
- ((0x00FF_0000usize & second_word) >> 16);
- let fourth = ((0x0000_FF00usize & second_word) << 8) |
- (0x0000_00FFusize & second_word);
-+ // Safety: fn invariant used here
- *dst = first;
- *(dst.add(1)) = second;
- *(dst.add(2)) = third;
- *(dst.add(3)) = fourth;
- }
-
-+ /// Safety: dst must point to valid space for writing two `usize`s
- #[inline(always)]
- unsafe fn pack_alu(first: usize, second: usize, third: usize, fourth: usize, dst: *mut usize) {
- let word = ((0x00FF_0000usize & first) << 8) |
- ((0x0000_00FFusize & first) << 16) |
- ((0x00FF_0000usize & second) >> 8) |
- (0x0000_00FFusize & second);
- let second_word = ((0x00FF_0000usize & third) << 8) |
- ((0x0000_00FFusize & third) << 16) |
- ((0x00FF_0000usize & fourth) >> 8) |
- (0x0000_00FFusize & fourth);
-+ // Safety: fn invariant used here
- *dst = word;
- *(dst.add(1)) = second_word;
- }
- } else {
- ascii_naive!(ascii_to_ascii, u8, u8);
- ascii_naive!(ascii_to_basic_latin, u8, u16);
- ascii_naive!(basic_latin_to_ascii, u16, u8);
- }
- }
-
- cfg_if! {
-+ // Safety-usable invariant: this counts the zeroes from the "first byte" of utf-8 data packed into a usize
-+ // with the target endianness
- if #[cfg(target_endian = "little")] {
- #[allow(dead_code)]
- #[inline(always)]
- fn count_zeros(word: usize) -> u32 {
- word.trailing_zeros()
- }
- } else {
- #[allow(dead_code)]
-@@ -1207,208 +1501,272 @@ cfg_if! {
- fn count_zeros(word: usize) -> u32 {
- word.leading_zeros()
- }
- }
- }
-
- cfg_if! {
- if #[cfg(all(feature = "simd-accel", target_endian = "little", target_arch = "disabled"))] {
-+ /// Safety-usable invariant: Will return the value and position of the first non-ASCII byte in the slice in a Some if found.
-+ /// In other words, the first element of the Some is always `> 127`
- #[inline(always)]
- pub fn validate_ascii(slice: &[u8]) -> Option<(u8, usize)> {
- let src = slice.as_ptr();
- let len = slice.len();
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading/writing at least `stride` elements.
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
- loop {
-+ // Safety: src at offset is valid for a `SIMD_STRIDE_SIZE` read
- let simd = unsafe { load16_unaligned(src.add(offset)) };
- if !simd_is_ascii(simd) {
- break;
- }
- offset += SIMD_STRIDE_SIZE;
-+ // This is `offset > len - SIMD_STRIDE_SIZE` which means we always have at least `SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- while offset < len {
- let code_unit = slice[offset];
- if code_unit > 127 {
-+ // Safety: Safety-usable invariant upheld here
- return Some((code_unit, offset));
- }
- offset += 1;
- }
- None
- }
- } else if #[cfg(all(feature = "simd-accel", target_feature = "sse2"))] {
-+ /// Safety-usable invariant: will return Some() when it encounters non-ASCII, with the first element in the Some being
-+ /// guaranteed to be non-ASCII (> 127), and the second being the offset where it is found
- #[inline(always)]
- pub fn validate_ascii(slice: &[u8]) -> Option<(u8, usize)> {
- let src = slice.as_ptr();
- let len = slice.len();
- let mut offset = 0usize;
-+ // Safety: if this check succeeds we're valid for reading at least `stride` elements.
- if SIMD_STRIDE_SIZE <= len {
- // First, process one unaligned vector
-+ // Safety: src is valid for a `SIMD_STRIDE_SIZE` read
- let simd = unsafe { load16_unaligned(src) };
- let mask = mask_ascii(simd);
- if mask != 0 {
- offset = mask.trailing_zeros() as usize;
- let non_ascii = unsafe { *src.add(offset) };
- return Some((non_ascii, offset));
- }
- offset = SIMD_STRIDE_SIZE;
-+ // Safety: Now that offset has changed we don't yet know how much it is valid for
-
- // We have now seen 16 ASCII bytes. Let's guess that
- // there will be enough more to justify more expense
- // in the case of non-ASCII.
- // Use aligned reads for the sake of old microachitectures.
-+ // Safety: this correctly calculates the number of src_units that need to be read before the remaining list is aligned.
-+ // This is by definition less than SIMD_ALIGNMENT, which is defined to be equal to SIMD_STRIDE_SIZE.
- let until_alignment = unsafe { (SIMD_ALIGNMENT - ((src.add(offset) as usize) & SIMD_ALIGNMENT_MASK)) & SIMD_ALIGNMENT_MASK };
- // This addition won't overflow, because even in the 32-bit PAE case the
- // address space holds enough code that the slice length can't be that
- // close to address space size.
- // offset now equals SIMD_STRIDE_SIZE, hence times 3 below.
-+ //
-+ // Safety: if this check succeeds we're valid for reading at least `2 * SIMD_STRIDE_SIZE` elements plus `until_alignment`.
-+ // The extra SIMD_STRIDE_SIZE in the condition is because `offset` is already `SIMD_STRIDE_SIZE`.
- if until_alignment + (SIMD_STRIDE_SIZE * 3) <= len {
- if until_alignment != 0 {
-+ // Safety: this is safe to call since we're valid for this read (and more), and don't care about alignment
-+ // This will copy over bytes that get decoded twice since it's not incrementing `offset` by SIMD_STRIDE_SIZE. This is fine.
- let simd = unsafe { load16_unaligned(src.add(offset)) };
- let mask = mask_ascii(simd);
- if mask != 0 {
- offset += mask.trailing_zeros() as usize;
- let non_ascii = unsafe { *src.add(offset) };
- return Some((non_ascii, offset));
- }
- offset += until_alignment;
- }
-+ // Safety: At this point we're valid for reading 2*SIMD_STRIDE_SIZE elements
-+ // Safety: Now `offset` is aligned for `src`
- let len_minus_stride_times_two = len - (SIMD_STRIDE_SIZE * 2);
- loop {
-+ // Safety: We were valid for this read, and were aligned.
- let first = unsafe { load16_aligned(src.add(offset)) };
- let second = unsafe { load16_aligned(src.add(offset + SIMD_STRIDE_SIZE)) };
- if !simd_is_ascii(first | second) {
-+ // Safety: mask_ascii produces a mask of all the high bits.
- let mask_first = mask_ascii(first);
- if mask_first != 0 {
-+ // Safety: on little endian systems this will be the number of ascii bytes
-+ // before the first non-ascii, i.e. valid for indexing src
-+ // TODO SAFETY: What about big-endian systems?
- offset += mask_first.trailing_zeros() as usize;
- } else {
- let mask_second = mask_ascii(second);
-+ // Safety: on little endian systems this will be the number of ascii bytes
-+ // before the first non-ascii, i.e. valid for indexing src
- offset += SIMD_STRIDE_SIZE + mask_second.trailing_zeros() as usize;
- }
-+ // Safety: We know this is non-ASCII, and can uphold the safety-usable invariant here
- let non_ascii = unsafe { *src.add(offset) };
-+
- return Some((non_ascii, offset));
- }
- offset += SIMD_STRIDE_SIZE * 2;
-+ // Safety: This is `offset > len - 2 * SIMD_STRIDE_SIZE` which means we always have at least `2 * SIMD_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride_times_two {
- break;
- }
- }
-+ // Safety: if this check succeeds we're valid for reading at least `SIMD_STRIDE_SIZE`
- if offset + SIMD_STRIDE_SIZE <= len {
-- let simd = unsafe { load16_aligned(src.add(offset)) };
-- let mask = mask_ascii(simd);
-+ // Safety: We were valid for this read, and were aligned.
-+ let simd = unsafe { load16_aligned(src.add(offset)) };
-+ // Safety: mask_ascii produces a mask of all the high bits.
-+ let mask = mask_ascii(simd);
- if mask != 0 {
-+ // Safety: on little endian systems this will be the number of ascii bytes
-+ // before the first non-ascii, i.e. valid for indexing src
- offset += mask.trailing_zeros() as usize;
- let non_ascii = unsafe { *src.add(offset) };
-+ // Safety: We know this is non-ASCII, and can uphold the safety-usable invariant here
- return Some((non_ascii, offset));
- }
- offset += SIMD_STRIDE_SIZE;
- }
- } else {
-+ // Safety: this is the unaligned branch
- // At most two iterations, so unroll
-+ // Safety: if this check succeeds we're valid for reading at least `SIMD_STRIDE_SIZE`
- if offset + SIMD_STRIDE_SIZE <= len {
-+ // Safety: We're valid for this read but must use an unaligned read
- let simd = unsafe { load16_unaligned(src.add(offset)) };
- let mask = mask_ascii(simd);
- if mask != 0 {
- offset += mask.trailing_zeros() as usize;
- let non_ascii = unsafe { *src.add(offset) };
-+ // Safety-usable invariant upheld here (same as above)
- return Some((non_ascii, offset));
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: if this check succeeds we're valid for reading at least `SIMD_STRIDE_SIZE`
- if offset + SIMD_STRIDE_SIZE <= len {
-+ // Safety: We're valid for this read but must use an unaligned read
- let simd = unsafe { load16_unaligned(src.add(offset)) };
- let mask = mask_ascii(simd);
- if mask != 0 {
- offset += mask.trailing_zeros() as usize;
- let non_ascii = unsafe { *src.add(offset) };
-+ // Safety-usable invariant upheld here (same as above)
- return Some((non_ascii, offset));
- }
- offset += SIMD_STRIDE_SIZE;
- }
- }
- }
- }
- while offset < len {
-+ // Safety: relies straightforwardly on the `len` invariant
- let code_unit = unsafe { *(src.add(offset)) };
- if code_unit > 127 {
-+ // Safety-usable invariant upheld here
- return Some((code_unit, offset));
- }
- offset += 1;
- }
- None
- }
- } else {
-+ // Safety-usable invariant: returns byte index of first non-ascii byte
- #[inline(always)]
- fn find_non_ascii(word: usize, second_word: usize) -> Option<usize> {
- let word_masked = word & ASCII_MASK;
- let second_masked = second_word & ASCII_MASK;
- if (word_masked | second_masked) == 0 {
-+ // Both are ascii, invariant upheld
- return None;
- }
- if word_masked != 0 {
- let zeros = count_zeros(word_masked);
-- // `zeros` now contains 7 (for the seven bits of non-ASCII)
-+ // `zeros` now contains 0 to 7 (for the seven bits of masked ASCII in little endian,
-+ // or up to 7 bits of non-ASCII in big endian if the first byte is non-ASCII)
- // plus 8 times the number of ASCII in text order before the
- // non-ASCII byte in the little-endian case or 8 times the number of ASCII in
- // text order before the non-ASCII byte in the big-endian case.
- let num_ascii = (zeros >> 3) as usize;
-+ // Safety-usable invariant upheld here
- return Some(num_ascii);
- }
- let zeros = count_zeros(second_masked);
-- // `zeros` now contains 7 (for the seven bits of non-ASCII)
-+ // `zeros` now contains 0 to 7 (for the seven bits of masked ASCII in little endian,
-+ // or up to 7 bits of non-ASCII in big endian if the first byte is non-ASCII)
- // plus 8 times the number of ASCII in text order before the
- // non-ASCII byte in the little-endian case or 8 times the number of ASCII in
- // text order before the non-ASCII byte in the big-endian case.
- let num_ascii = (zeros >> 3) as usize;
-+ // Safety-usable invariant upheld here
- Some(ALU_ALIGNMENT + num_ascii)
- }
-
-+ /// Safety: `src` must be valid for the reads of two `usize`s
-+ ///
-+ /// Safety-usable invariant: will return byte index of first non-ascii byte
- #[inline(always)]
- unsafe fn validate_ascii_stride(src: *const usize) -> Option<usize> {
- let word = *src;
- let second_word = *(src.add(1));
- find_non_ascii(word, second_word)
- }
-
-+ /// Safety-usable invariant: will return Some() when it encounters non-ASCII, with the first element in the Some being
-+ /// guaranteed to be non-ASCII (> 127), and the second being the offset where it is found
- #[cfg_attr(feature = "cargo-clippy", allow(cast_ptr_alignment))]
- #[inline(always)]
- pub fn validate_ascii(slice: &[u8]) -> Option<(u8, usize)> {
- let src = slice.as_ptr();
- let len = slice.len();
- let mut offset = 0usize;
- let mut until_alignment = (ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) & ALU_ALIGNMENT_MASK;
-+ // Safety: If this check fails we're valid to read `until_alignment + ALU_STRIDE_SIZE` elements
- if until_alignment + ALU_STRIDE_SIZE <= len {
- while until_alignment != 0 {
- let code_unit = slice[offset];
- if code_unit > 127 {
-+ // Safety-usable invairant upheld here
- return Some((code_unit, offset));
- }
- offset += 1;
- until_alignment -= 1;
- }
-+ // Safety: At this point we have read until_alignment elements and
-+ // are valid for `ALU_STRIDE_SIZE` more.
- let len_minus_stride = len - ALU_STRIDE_SIZE;
- loop {
-+ // Safety: we were valid for this read
- let ptr = unsafe { src.add(offset) as *const usize };
- if let Some(num_ascii) = unsafe { validate_ascii_stride(ptr) } {
- offset += num_ascii;
-+ // Safety-usable invairant upheld here using the invariant from validate_ascii_stride()
- return Some((unsafe { *(src.add(offset)) }, offset));
- }
- offset += ALU_STRIDE_SIZE;
-+ // Safety: This is `offset > ALU_STRIDE_SIZE` which means we always have at least `2 * ALU_STRIDE_SIZE` elements to munch next time.
- if offset > len_minus_stride {
- break;
- }
- }
- }
- while offset < len {
- let code_unit = slice[offset];
- if code_unit > 127 {
-+ // Safety-usable invairant upheld here
- return Some((code_unit, offset));
- }
- offset += 1;
- }
- None
- }
-
- }
-@@ -1423,70 +1781,88 @@ cfg_if! {
- // vector reads without vector writes.
-
- pub const ALU_STRIDE_SIZE: usize = 8;
-
- pub const ALU_ALIGNMENT: usize = 4;
-
- pub const ALU_ALIGNMENT_MASK: usize = 3;
- } else {
-+ // Safety: src points to two valid `usize`s, dst points to four valid `usize`s
- #[inline(always)]
- unsafe fn unpack_latin1_stride_alu(src: *const usize, dst: *mut usize) {
-+ // Safety: src safety invariant used here
- let word = *src;
- let second_word = *(src.add(1));
-+ // Safety: dst safety invariant passed down
- unpack_alu(word, second_word, dst);
- }
-
-+ // Safety: src points to four valid `usize`s, dst points to two valid `usize`s
- #[inline(always)]
- unsafe fn pack_latin1_stride_alu(src: *const usize, dst: *mut usize) {
-+ // Safety: src safety invariant used here
- let first = *src;
- let second = *(src.add(1));
- let third = *(src.add(2));
- let fourth = *(src.add(3));
-+ // Safety: dst safety invariant passed down
- pack_alu(first, second, third, fourth, dst);
- }
-
-+ // Safety: src points to two valid `usize`s, dst points to four valid `usize`s
- #[inline(always)]
- unsafe fn ascii_to_basic_latin_stride_alu(src: *const usize, dst: *mut usize) -> bool {
-+ // Safety: src safety invariant used here
- let word = *src;
- let second_word = *(src.add(1));
- // Check if the words contains non-ASCII
- if (word & ASCII_MASK) | (second_word & ASCII_MASK) != 0 {
- return false;
- }
-+ // Safety: dst safety invariant passed down
- unpack_alu(word, second_word, dst);
- true
- }
-
-+ // Safety: src points four valid `usize`s, dst points to two valid `usize`s
- #[inline(always)]
- unsafe fn basic_latin_to_ascii_stride_alu(src: *const usize, dst: *mut usize) -> bool {
-+ // Safety: src safety invariant used here
- let first = *src;
- let second = *(src.add(1));
- let third = *(src.add(2));
- let fourth = *(src.add(3));
- if (first & BASIC_LATIN_MASK) | (second & BASIC_LATIN_MASK) | (third & BASIC_LATIN_MASK) | (fourth & BASIC_LATIN_MASK) != 0 {
- return false;
- }
-+ // Safety: dst safety invariant passed down
- pack_alu(first, second, third, fourth, dst);
- true
- }
-
-+ // Safety: src, dst both point to two valid `usize`s each
-+ // Safety-usable invariant: Will return byte index of first non-ascii byte.
- #[inline(always)]
- unsafe fn ascii_to_ascii_stride(src: *const usize, dst: *mut usize) -> Option<usize> {
-+ // Safety: src safety invariant used here
- let word = *src;
- let second_word = *(src.add(1));
-+ // Safety: src safety invariant used here
- *dst = word;
- *(dst.add(1)) = second_word;
-+ // Relies on safety-usable invariant here
- find_non_ascii(word, second_word)
- }
-
- basic_latin_alu!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_alu);
- basic_latin_alu!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_alu);
- latin1_alu!(unpack_latin1, u8, u16, unpack_latin1_stride_alu);
- latin1_alu!(pack_latin1, u16, u8, pack_latin1_stride_alu);
-+ // Safety invariant upheld: ascii_to_ascii_stride will return byte index of first non-ascii if found
- ascii_alu!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride);
- }
- }
-
- pub fn ascii_valid_up_to(bytes: &[u8]) -> usize {
- match validate_ascii(bytes) {
- None => bytes.len(),
- Some((_, num_valid)) => num_valid,
-diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs
---- third_party/rust/encoding_rs/src/handles.rs
-+++ third_party/rust/encoding_rs/src/handles.rs
-@@ -29,17 +29,17 @@ use crate::simd_funcs::*;
- #[cfg(all(
- feature = "simd-accel",
- any(
- target_feature = "sse2",
- all(target_endian = "little", target_arch = "aarch64"),
- all(target_endian = "little", target_feature = "neon")
- )
- ))]
--use packed_simd::u16x8;
-+use core::simd::u16x8;
-
- use super::DecoderResult;
- use super::EncoderResult;
- use crate::ascii::*;
- use crate::utf_8::convert_utf8_to_utf16_up_to_invalid;
- use crate::utf_8::utf8_valid_up_to;
-
- pub enum Space<T> {
-@@ -85,84 +85,100 @@ impl Endian for LittleEndian {
- const OPPOSITE_ENDIAN: bool = false;
-
- #[cfg(target_endian = "big")]
- const OPPOSITE_ENDIAN: bool = true;
- }
-
- #[derive(Debug, Copy, Clone)]
- struct UnalignedU16Slice {
-+ // Safety invariant: ptr must be valid for reading 2*len bytes
- ptr: *const u8,
- len: usize,
- }
-
- impl UnalignedU16Slice {
-+ /// Safety: ptr must be valid for reading 2*len bytes
- #[inline(always)]
- pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
-+ // Safety: field invariant passed up to caller here
- UnalignedU16Slice { ptr, len }
- }
-
- #[inline(always)]
- pub fn trim_last(&mut self) {
- assert!(self.len > 0);
-+ // Safety: invariant upheld here: a slice is still valid with a shorter len
- self.len -= 1;
- }
-
- #[inline(always)]
- pub fn at(&self, i: usize) -> u16 {
- use core::mem::MaybeUninit;
-
- assert!(i < self.len);
- unsafe {
- let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
-+ // Safety: i is at most len - 1, which works here
- ::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
-+ // Safety: valid read above lets us do this
- u.assume_init()
- }
- }
-
- #[cfg(feature = "simd-accel")]
- #[inline(always)]
- pub fn simd_at(&self, i: usize) -> u16x8 {
-+ // Safety: i/len are on the scale of u16s, each one corresponds to 2 u8s
- assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
- let byte_index = i * 2;
-+ // Safety: load16_unaligned needs SIMD_STRIDE_SIZE=16 u8 elements to read,
-+ // or 16/2 = 8 u16 elements to read.
-+ // We have checked that we have at least that many above.
-+
- unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
- }
-
- #[inline(always)]
- pub fn len(&self) -> usize {
- self.len
- }
-
- #[inline(always)]
- pub fn tail(&self, from: usize) -> UnalignedU16Slice {
- // XXX the return value should be restricted not to
- // outlive self.
- assert!(from <= self.len);
-+ // Safety: This upholds the same invariant: `from` is in bounds and we're returning a shorter slice
- unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
- }
-
- #[cfg(feature = "simd-accel")]
- #[inline(always)]
- pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
- assert!(self.len <= other.len());
- let mut offset = 0;
-+ // Safety: SIMD_STRIDE_SIZE is measured in bytes, whereas len is in u16s. We check we can
-+ // munch SIMD_STRIDE_SIZE / 2 u16s which means we can write SIMD_STRIDE_SIZE u8s
- if SIMD_STRIDE_SIZE / 2 <= self.len {
- let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
- loop {
- let mut simd = self.simd_at(offset);
- if E::OPPOSITE_ENDIAN {
- simd = simd_byte_swap(simd);
- }
-+ // Safety: we have enough space on the other side to write this
- unsafe {
- store8_unaligned(other.as_mut_ptr().add(offset), simd);
- }
- if contains_surrogates(simd) {
- break;
- }
- offset += SIMD_STRIDE_SIZE / 2;
-+ // Safety: This ensures we still have space for writing SIMD_STRIDE_SIZE u8s
- if offset > len_minus_stride {
- break;
- }
- }
- }
- while offset < self.len {
- let unit = swap_if_opposite_endian::<E>(self.at(offset));
- other[offset] = unit;
-@@ -231,33 +247,37 @@ fn copy_unaligned_basic_latin_to_ascii<E
- #[cfg(feature = "simd-accel")]
- #[inline(always)]
- fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
- src: UnalignedU16Slice,
- dst: &mut [u8],
- ) -> CopyAsciiResult<usize, (u16, usize)> {
- let len = ::core::cmp::min(src.len(), dst.len());
- let mut offset = 0;
-+ // Safety: This check ensures we are able to read/write at least SIMD_STRIDE_SIZE elements
- if SIMD_STRIDE_SIZE <= len {
- let len_minus_stride = len - SIMD_STRIDE_SIZE;
- loop {
- let mut first = src.simd_at(offset);
- let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2));
- if E::OPPOSITE_ENDIAN {
- first = simd_byte_swap(first);
- second = simd_byte_swap(second);
- }
- if !simd_is_basic_latin(first | second) {
- break;
- }
- let packed = simd_pack(first, second);
-+ // Safety: We are able to write SIMD_STRIDE_SIZE elements in this iteration
- unsafe {
- store16_unaligned(dst.as_mut_ptr().add(offset), packed);
- }
- offset += SIMD_STRIDE_SIZE;
-+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which ensures that we can write at least SIMD_STRIDE_SIZE elements
-+ // in the next iteration
- if offset > len_minus_stride {
- break;
- }
- }
- }
- copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset)
- }
-
-@@ -632,94 +652,106 @@ impl<'a> Utf16Destination<'a> {
- #[inline(always)]
- fn write_astral(&mut self, astral: u32) {
- debug_assert!(astral > 0xFFFF);
- debug_assert!(astral <= 0x10_FFFF);
- self.write_code_unit((0xD7C0 + (astral >> 10)) as u16);
- self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
- }
- #[inline(always)]
-- pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
-+ fn write_surrogate_pair(&mut self, high: u16, low: u16) {
- self.write_code_unit(high);
- self.write_code_unit(low);
- }
- #[inline(always)]
- fn write_big5_combination(&mut self, combined: u16, combining: u16) {
- self.write_bmp_excl_ascii(combined);
- self.write_bmp_excl_ascii(combining);
- }
-+ // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
- #[inline(always)]
- pub fn copy_ascii_from_check_space_bmp<'b>(
- &'b mut self,
- source: &mut ByteSource,
- ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> {
- let non_ascii_ret = {
- let src_remaining = &source.slice[source.pos..];
- let dst_remaining = &mut self.slice[self.pos..];
- let (pending, length) = if dst_remaining.len() < src_remaining.len() {
- (DecoderResult::OutputFull, dst_remaining.len())
- } else {
- (DecoderResult::InputEmpty, src_remaining.len())
- };
-+ // Safety: This function is documented as needing valid pointers for src/dest and len, which
-+ // is true since we've passed the minumum length of the two
- match unsafe {
- ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
- } {
- None => {
- source.pos += length;
- self.pos += length;
- return CopyAsciiResult::Stop((pending, source.pos, self.pos));
- }
-+ // Safety: the function is documented as returning bytes >=0x80 in the Some
- Some((non_ascii, consumed)) => {
- source.pos += consumed;
- self.pos += consumed;
- source.pos += 1; // +1 for non_ascii
-+ // Safety: non-ascii bubbled out here
- non_ascii
- }
- }
- };
-+ // Safety: non-ascii returned here
- CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
- }
-+ // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
- #[inline(always)]
- pub fn copy_ascii_from_check_space_astral<'b>(
- &'b mut self,
- source: &mut ByteSource,
- ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> {
- let non_ascii_ret = {
- let dst_len = self.slice.len();
- let src_remaining = &source.slice[source.pos..];
- let dst_remaining = &mut self.slice[self.pos..];
- let (pending, length) = if dst_remaining.len() < src_remaining.len() {
- (DecoderResult::OutputFull, dst_remaining.len())
- } else {
- (DecoderResult::InputEmpty, src_remaining.len())
- };
-+ // Safety: This function is documented as needing valid pointers for src/dest and len, which
-+ // is true since we've passed the minumum length of the two
- match unsafe {
- ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
- } {
- None => {
- source.pos += length;
- self.pos += length;
- return CopyAsciiResult::Stop((pending, source.pos, self.pos));
- }
-+ // Safety: the function is documented as returning bytes >=0x80 in the Some
- Some((non_ascii, consumed)) => {
- source.pos += consumed;
- self.pos += consumed;
- if self.pos + 1 < dst_len {
- source.pos += 1; // +1 for non_ascii
-+ // Safety: non-ascii bubbled out here
- non_ascii
- } else {
- return CopyAsciiResult::Stop((
- DecoderResult::OutputFull,
- source.pos,
- self.pos,
- ));
- }
- }
- }
- };
-+ // Safety: non-ascii returned here
- CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
- }
- #[inline(always)]
- pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) {
- let src_remaining = &source.slice[source.pos..];
- let dst_remaining = &mut self.slice[self.pos..];
- let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining);
- source.pos += read;
-diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs
---- third_party/rust/encoding_rs/src/lib.rs
-+++ third_party/rust/encoding_rs/src/lib.rs
-@@ -684,37 +684,26 @@
- //! <tr><td>TIS-620</td><td>windows-874</td></tr>
- //! </tbody>
- //! </table>
- //!
- //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes)
- //! for discussion about the UTF-16 family.
-
- #![no_std]
--#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
-+#![cfg_attr(feature = "simd-accel", feature(core_intrinsics, portable_simd))]
-
- #[cfg(feature = "alloc")]
- #[cfg_attr(test, macro_use)]
- extern crate alloc;
-
- extern crate core;
- #[macro_use]
- extern crate cfg_if;
-
--#[cfg(all(
-- feature = "simd-accel",
-- any(
-- target_feature = "sse2",
-- all(target_endian = "little", target_arch = "aarch64"),
-- all(target_endian = "little", target_feature = "neon")
-- )
--))]
--#[macro_use(shuffle)]
--extern crate packed_simd;
--
- #[cfg(feature = "serde")]
- extern crate serde;
-
- #[cfg(all(test, feature = "serde"))]
- extern crate bincode;
- #[cfg(all(test, feature = "serde"))]
- #[macro_use]
- extern crate serde_derive;
-diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs
---- third_party/rust/encoding_rs/src/mem.rs
-+++ third_party/rust/encoding_rs/src/mem.rs
-@@ -111,16 +111,21 @@ macro_rules! by_unit_check_alu {
- until_alignment -= 1;
- }
- if accu >= $bound {
- return false;
- }
- }
- let len_minus_stride = len - ALU_ALIGNMENT / unit_size;
- if offset + (4 * (ALU_ALIGNMENT / unit_size)) <= len {
-+ // Safety: the above check lets us perform 4 consecutive reads of
-+ // length ALU_ALIGNMENT / unit_size. ALU_ALIGNMENT is the size of usize, and unit_size
-+ // is the size of the `src` pointer, so this is equal to performing four usize reads.
-+ //
-+ // This invariant is upheld on all loop iterations
- let len_minus_unroll = len - (4 * (ALU_ALIGNMENT / unit_size));
- loop {
- let unroll_accu = unsafe { *(src.add(offset) as *const usize) }
- | unsafe {
- *(src.add(offset + (ALU_ALIGNMENT / unit_size)) as *const usize)
- }
- | unsafe {
- *(src.add(offset + (2 * (ALU_ALIGNMENT / unit_size)))
-@@ -129,22 +134,24 @@ macro_rules! by_unit_check_alu {
- | unsafe {
- *(src.add(offset + (3 * (ALU_ALIGNMENT / unit_size)))
- as *const usize)
- };
- if unroll_accu & $mask != 0 {
- return false;
- }
- offset += 4 * (ALU_ALIGNMENT / unit_size);
-+ // Safety: this check lets us continue to perform the 4 reads earlier
- if offset > len_minus_unroll {
- break;
- }
- }
- }
- while offset <= len_minus_stride {
-+ // Safety: the above check lets us perform one usize read.
- accu |= unsafe { *(src.add(offset) as *const usize) };
- offset += ALU_ALIGNMENT / unit_size;
- }
- }
- }
- for &unit in &buffer[offset..] {
- accu |= unit as usize;
- }
-@@ -184,16 +191,21 @@ macro_rules! by_unit_check_simd {
- until_alignment -= 1;
- }
- if accu >= $bound {
- return false;
- }
- }
- let len_minus_stride = len - SIMD_STRIDE_SIZE / unit_size;
- if offset + (4 * (SIMD_STRIDE_SIZE / unit_size)) <= len {
-+ // Safety: the above check lets us perform 4 consecutive reads of
-+ // length SIMD_STRIDE_SIZE / unit_size. SIMD_STRIDE_SIZE is the size of $simd_ty, and unit_size
-+ // is the size of the `src` pointer, so this is equal to performing four $simd_ty reads.
-+ //
-+ // This invariant is upheld on all loop iterations
- let len_minus_unroll = len - (4 * (SIMD_STRIDE_SIZE / unit_size));
- loop {
- let unroll_accu = unsafe { *(src.add(offset) as *const $simd_ty) }
- | unsafe {
- *(src.add(offset + (SIMD_STRIDE_SIZE / unit_size))
- as *const $simd_ty)
- }
- | unsafe {
-@@ -203,23 +215,25 @@ macro_rules! by_unit_check_simd {
- | unsafe {
- *(src.add(offset + (3 * (SIMD_STRIDE_SIZE / unit_size)))
- as *const $simd_ty)
- };
- if !$func(unroll_accu) {
- return false;
- }
- offset += 4 * (SIMD_STRIDE_SIZE / unit_size);
-+ // Safety: this check lets us continue to perform the 4 reads earlier
- if offset > len_minus_unroll {
- break;
- }
- }
- }
- let mut simd_accu = $splat;
- while offset <= len_minus_stride {
-+ // Safety: the above check lets us perform one $simd_ty read.
- simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) };
- offset += SIMD_STRIDE_SIZE / unit_size;
- }
- if !$func(simd_accu) {
- return false;
- }
- }
- }
-@@ -229,18 +243,18 @@ macro_rules! by_unit_check_simd {
- accu < $bound
- }
- };
- }
-
- cfg_if! {
- if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
- use crate::simd_funcs::*;
-- use packed_simd::u8x16;
-- use packed_simd::u16x8;
-+ use core::simd::u8x16;
-+ use core::simd::u16x8;
-
- const SIMD_ALIGNMENT: usize = 16;
-
- const SIMD_ALIGNMENT_MASK: usize = 15;
-
- by_unit_check_simd!(is_ascii_impl, u8, u8x16::splat(0), u8x16, 0x80, simd_is_ascii);
- by_unit_check_simd!(is_basic_latin_impl, u16, u16x8::splat(0), u16x8, 0x80, simd_is_basic_latin);
- by_unit_check_simd!(is_utf16_latin1_impl, u16, u16x8::splat(0), u16x8, 0x100, simd_is_latin1);
-diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs
---- third_party/rust/encoding_rs/src/simd_funcs.rs
-+++ third_party/rust/encoding_rs/src/simd_funcs.rs
-@@ -2,65 +2,84 @@
- // file at the top-level directory of this distribution.
- //
- // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
- // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
- // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
- // option. This file may not be copied, modified, or distributed
- // except according to those terms.
-
--use packed_simd::u16x8;
--use packed_simd::u8x16;
--use packed_simd::IntoBits;
-+use any_all_workaround::all_mask16x8;
-+use any_all_workaround::all_mask8x16;
-+use any_all_workaround::any_mask16x8;
-+use any_all_workaround::any_mask8x16;
-+use core::simd::cmp::SimdPartialEq;
-+use core::simd::cmp::SimdPartialOrd;
-+use core::simd::mask16x8;
-+use core::simd::mask8x16;
-+use core::simd::simd_swizzle;
-+use core::simd::u16x8;
-+use core::simd::u8x16;
-+use core::simd::ToBytes;
-
- // TODO: Migrate unaligned access to stdlib code if/when the RFC
- // https://github.com/rust-lang/rfcs/pull/1725 is implemented.
-
-+/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
- #[inline(always)]
- pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 {
-- let mut simd = ::core::mem::uninitialized();
-- ::core::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16);
-- simd
-+ let mut simd = ::core::mem::MaybeUninit::<u8x16>::uninit();
-+ ::core::ptr::copy_nonoverlapping(ptr, simd.as_mut_ptr() as *mut u8, 16);
-+ // Safety: copied 16 bytes of initialized memory into this, it is now initialized
-+ simd.assume_init()
- }
-
-+/// Safety invariant: ptr must be valid for an aligned-for-u8x16 read of 16 bytes
- #[allow(dead_code)]
- #[inline(always)]
- pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 {
- *(ptr as *const u8x16)
- }
-
-+/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
- #[inline(always)]
- pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) {
- ::core::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16);
- }
-
-+/// Safety invariant: ptr must be valid for an aligned-for-u8x16 store of 16 bytes
- #[allow(dead_code)]
- #[inline(always)]
- pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) {
- *(ptr as *mut u8x16) = s;
- }
-
-+/// Safety invariant: ptr must be valid for an unaligned read of 16 bytes
- #[inline(always)]
- pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 {
-- let mut simd = ::core::mem::uninitialized();
-- ::core::ptr::copy_nonoverlapping(ptr as *const u8, &mut simd as *mut u16x8 as *mut u8, 16);
-- simd
-+ let mut simd = ::core::mem::MaybeUninit::<u16x8>::uninit();
-+ ::core::ptr::copy_nonoverlapping(ptr as *const u8, simd.as_mut_ptr() as *mut u8, 16);
-+ // Safety: copied 16 bytes of initialized memory into this, it is now initialized
-+ simd.assume_init()
- }
-
-+/// Safety invariant: ptr must be valid for an aligned-for-u16x8 read of 16 bytes
- #[allow(dead_code)]
- #[inline(always)]
- pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 {
- *(ptr as *const u16x8)
- }
-
-+/// Safety invariant: ptr must be valid for an unaligned store of 16 bytes
- #[inline(always)]
- pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) {
- ::core::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16);
- }
-
-+/// Safety invariant: ptr must be valid for an aligned-for-u16x8 store of 16 bytes
- #[allow(dead_code)]
- #[inline(always)]
- pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) {
- *(ptr as *mut u16x8) = s;
- }
-
- cfg_if! {
- if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] {
-@@ -95,234 +114,241 @@ cfg_if! {
- pub fn simd_byte_swap(s: u16x8) -> u16x8 {
- let left = s << 8;
- let right = s >> 8;
- left | right
- }
-
- #[inline(always)]
- pub fn to_u16_lanes(s: u8x16) -> u16x8 {
-- s.into_bits()
-+ u16x8::from_ne_bytes(s)
- }
-
- cfg_if! {
- if #[cfg(target_feature = "sse2")] {
-
- // Expose low-level mask instead of higher-level conclusion,
- // because the non-ASCII case would perform less well otherwise.
-+ // Safety-usable invariant: This returned value is whether each high bit is set
- #[inline(always)]
- pub fn mask_ascii(s: u8x16) -> i32 {
- unsafe {
-- _mm_movemask_epi8(s.into_bits())
-+ _mm_movemask_epi8(s.into())
- }
- }
-
- } else {
-
- }
- }
-
- cfg_if! {
- if #[cfg(target_feature = "sse2")] {
- #[inline(always)]
- pub fn simd_is_ascii(s: u8x16) -> bool {
- unsafe {
-- _mm_movemask_epi8(s.into_bits()) == 0
-+ // Safety: We have cfg()d the correct platform
-+ _mm_movemask_epi8(s.into()) == 0
- }
- }
- } else if #[cfg(target_arch = "aarch64")]{
- #[inline(always)]
- pub fn simd_is_ascii(s: u8x16) -> bool {
- unsafe {
-- vmaxvq_u8(s.into_bits()) < 0x80
-+ // Safety: We have cfg()d the correct platform
-+ vmaxvq_u8(s.into()) < 0x80
- }
- }
- } else {
- #[inline(always)]
- pub fn simd_is_ascii(s: u8x16) -> bool {
- // This optimizes better on ARM than
- // the lt formulation.
- let highest_ascii = u8x16::splat(0x7F);
-- !s.gt(highest_ascii).any()
-+ !any_mask8x16(s.simd_gt(highest_ascii))
- }
- }
- }
-
- cfg_if! {
- if #[cfg(target_feature = "sse2")] {
- #[inline(always)]
- pub fn simd_is_str_latin1(s: u8x16) -> bool {
- if simd_is_ascii(s) {
- return true;
- }
- let above_str_latin1 = u8x16::splat(0xC4);
-- s.lt(above_str_latin1).all()
-+ s.simd_lt(above_str_latin1).all()
- }
- } else if #[cfg(target_arch = "aarch64")]{
- #[inline(always)]
- pub fn simd_is_str_latin1(s: u8x16) -> bool {
- unsafe {
-- vmaxvq_u8(s.into_bits()) < 0xC4
-+ // Safety: We have cfg()d the correct platform
-+ vmaxvq_u8(s.into()) < 0xC4
- }
- }
- } else {
- #[inline(always)]
- pub fn simd_is_str_latin1(s: u8x16) -> bool {
- let above_str_latin1 = u8x16::splat(0xC4);
-- s.lt(above_str_latin1).all()
-+ all_mask8x16(s.simd_lt(above_str_latin1))
- }
- }
- }
-
- cfg_if! {
- if #[cfg(target_arch = "aarch64")]{
- #[inline(always)]
- pub fn simd_is_basic_latin(s: u16x8) -> bool {
- unsafe {
-- vmaxvq_u16(s.into_bits()) < 0x80
-+ // Safety: We have cfg()d the correct platform
-+ vmaxvq_u16(s.into()) < 0x80
- }
- }
-
- #[inline(always)]
- pub fn simd_is_latin1(s: u16x8) -> bool {
- unsafe {
-- vmaxvq_u16(s.into_bits()) < 0x100
-+ // Safety: We have cfg()d the correct platform
-+ vmaxvq_u16(s.into()) < 0x100
- }
- }
- } else {
- #[inline(always)]
- pub fn simd_is_basic_latin(s: u16x8) -> bool {
- let above_ascii = u16x8::splat(0x80);
-- s.lt(above_ascii).all()
-+ all_mask16x8(s.simd_lt(above_ascii))
- }
-
- #[inline(always)]
- pub fn simd_is_latin1(s: u16x8) -> bool {
- // For some reason, on SSE2 this formulation
- // seems faster in this case while the above
- // function is better the other way round...
- let highest_latin1 = u16x8::splat(0xFF);
-- !s.gt(highest_latin1).any()
-+ !any_mask16x8(s.simd_gt(highest_latin1))
- }
- }
- }
-
- #[inline(always)]
- pub fn contains_surrogates(s: u16x8) -> bool {
- let mask = u16x8::splat(0xF800);
- let surrogate_bits = u16x8::splat(0xD800);
-- (s & mask).eq(surrogate_bits).any()
-+ any_mask16x8((s & mask).simd_eq(surrogate_bits))
- }
-
- cfg_if! {
- if #[cfg(target_arch = "aarch64")]{
- macro_rules! aarch64_return_false_if_below_hebrew {
- ($s:ident) => ({
- unsafe {
-- if vmaxvq_u16($s.into_bits()) < 0x0590 {
-+ // Safety: We have cfg()d the correct platform
-+ if vmaxvq_u16($s.into()) < 0x0590 {
- return false;
- }
- }
- })
- }
-
- macro_rules! non_aarch64_return_false_if_all {
- ($s:ident) => ()
- }
- } else {
- macro_rules! aarch64_return_false_if_below_hebrew {
- ($s:ident) => ()
- }
-
- macro_rules! non_aarch64_return_false_if_all {
- ($s:ident) => ({
-- if $s.all() {
-+ if all_mask16x8($s) {
- return false;
- }
- })
- }
- }
- }
-
- macro_rules! in_range16x8 {
- ($s:ident, $start:expr, $end:expr) => {{
- // SIMD sub is wrapping
-- ($s - u16x8::splat($start)).lt(u16x8::splat($end - $start))
-+ ($s - u16x8::splat($start)).simd_lt(u16x8::splat($end - $start))
- }};
- }
-
- #[inline(always)]
- pub fn is_u16x8_bidi(s: u16x8) -> bool {
- // We try to first quickly refute the RTLness of the vector. If that
- // fails, we do the real RTL check, so in that case we end up wasting
- // the work for the up-front quick checks. Even the quick-check is
- // two-fold in order to return `false` ASAP if everything is below
- // Hebrew.
-
- aarch64_return_false_if_below_hebrew!(s);
-
-- let below_hebrew = s.lt(u16x8::splat(0x0590));
-+ let below_hebrew = s.simd_lt(u16x8::splat(0x0590));
-
- non_aarch64_return_false_if_all!(below_hebrew);
-
-- if (below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802)).all() {
-+ if all_mask16x8(
-+ below_hebrew | in_range16x8!(s, 0x0900, 0x200F) | in_range16x8!(s, 0x2068, 0xD802),
-+ ) {
- return false;
- }
-
- // Quick refutation failed. Let's do the full check.
-
-- (in_range16x8!(s, 0x0590, 0x0900)
-- | in_range16x8!(s, 0xFB1D, 0xFE00)
-- | in_range16x8!(s, 0xFE70, 0xFEFF)
-- | in_range16x8!(s, 0xD802, 0xD804)
-- | in_range16x8!(s, 0xD83A, 0xD83C)
-- | s.eq(u16x8::splat(0x200F))
-- | s.eq(u16x8::splat(0x202B))
-- | s.eq(u16x8::splat(0x202E))
-- | s.eq(u16x8::splat(0x2067)))
-- .any()
-+ any_mask16x8(
-+ (in_range16x8!(s, 0x0590, 0x0900)
-+ | in_range16x8!(s, 0xFB1D, 0xFE00)
-+ | in_range16x8!(s, 0xFE70, 0xFEFF)
-+ | in_range16x8!(s, 0xD802, 0xD804)
-+ | in_range16x8!(s, 0xD83A, 0xD83C)
-+ | s.simd_eq(u16x8::splat(0x200F))
-+ | s.simd_eq(u16x8::splat(0x202B))
-+ | s.simd_eq(u16x8::splat(0x202E))
-+ | s.simd_eq(u16x8::splat(0x2067))),
-+ )
- }
-
- #[inline(always)]
- pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
-- unsafe {
-- let first: u8x16 = shuffle!(
-- s,
-- u8x16::splat(0),
-- [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-- );
-- let second: u8x16 = shuffle!(
-- s,
-- u8x16::splat(0),
-- [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
-- );
-- (first.into_bits(), second.into_bits())
-- }
-+ let first: u8x16 = simd_swizzle!(
-+ s,
-+ u8x16::splat(0),
-+ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-+ );
-+ let second: u8x16 = simd_swizzle!(
-+ s,
-+ u8x16::splat(0),
-+ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
-+ );
-+ (u16x8::from_ne_bytes(first), u16x8::from_ne_bytes(second))
- }
-
- cfg_if! {
- if #[cfg(target_feature = "sse2")] {
- #[inline(always)]
- pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
- unsafe {
-- _mm_packus_epi16(a.into_bits(), b.into_bits()).into_bits()
-+ // Safety: We have cfg()d the correct platform
-+ _mm_packus_epi16(a.into(), b.into()).into()
- }
- }
- } else {
- #[inline(always)]
- pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
-- unsafe {
-- let first: u8x16 = a.into_bits();
-- let second: u8x16 = b.into_bits();
-- shuffle!(
-- first,
-- second,
-- [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-- )
-- }
-+ let first: u8x16 = a.to_ne_bytes();
-+ let second: u8x16 = b.to_ne_bytes();
-+ simd_swizzle!(
-+ first,
-+ second,
-+ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-+ )
- }
- }
- }
-
- #[cfg(test)]
- mod tests {
- use super::*;
- use alloc::vec::Vec;
-diff --git a/third_party/rust/encoding_rs/src/single_byte.rs b/third_party/rust/encoding_rs/src/single_byte.rs
---- third_party/rust/encoding_rs/src/single_byte.rs
-+++ third_party/rust/encoding_rs/src/single_byte.rs
-@@ -48,16 +48,19 @@ impl SingleByteDecoder {
- CopyAsciiResult::GoOn((mut non_ascii, mut handle)) => 'middle: loop {
- // Start non-boilerplate
- //
- // Since the non-ASCIIness of `non_ascii` is hidden from
- // the optimizer, it can't figure out that it's OK to
- // statically omit the bound check when accessing
- // `[u16; 128]` with an index
- // `non_ascii as usize - 0x80usize`.
-+ //
-+ // Safety: `non_ascii` is a u8 byte >=0x80, from the invariants
-+ // on Utf8Destination::copy_ascii_from_check_space_bmp()
- let mapped =
- unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
- // let mapped = self.table[non_ascii as usize - 0x80usize];
- if mapped == 0u16 {
- return (
- DecoderResult::Malformed(1, 0),
- source.consumed(),
- handle.written(),
-@@ -146,82 +149,103 @@ impl SingleByteDecoder {
- dst: &mut [u16],
- _last: bool,
- ) -> (DecoderResult, usize, usize) {
- let (pending, length) = if dst.len() < src.len() {
- (DecoderResult::OutputFull, dst.len())
- } else {
- (DecoderResult::InputEmpty, src.len())
- };
-+ // Safety invariant: converted <= length. Quite often we have `converted < length`
-+ // which will be separately marked.
- let mut converted = 0usize;
- 'outermost: loop {
- match unsafe {
-+ // Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
- ascii_to_basic_latin(
- src.as_ptr().add(converted),
- dst.as_mut_ptr().add(converted),
- length - converted,
- )
- } {
- None => {
- return (pending, length, length);
- }
- Some((mut non_ascii, consumed)) => {
-+ // Safety invariant: `converted <= length` upheld, since this can only consume
-+ // up to `length - converted` bytes.
-+ //
-+ // Furthermore, in this context,
-+ // we can assume `converted < length` since this branch is only ever hit when
-+ // ascii_to_basic_latin fails to consume the entire slice
- converted += consumed;
- 'middle: loop {
- // `converted` doesn't count the reading of `non_ascii` yet.
- // Since the non-ASCIIness of `non_ascii` is hidden from
- // the optimizer, it can't figure out that it's OK to
- // statically omit the bound check when accessing
- // `[u16; 128]` with an index
- // `non_ascii as usize - 0x80usize`.
-+ //
-+ // Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
-+ // the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
- let mapped =
- unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
- // let mapped = self.table[non_ascii as usize - 0x80usize];
- if mapped == 0u16 {
- return (
- DecoderResult::Malformed(1, 0),
- converted + 1, // +1 `for non_ascii`
- converted,
- );
- }
- unsafe {
-- // The bound check has already been performed
-+ // Safety: As mentioned above, `converted < length`
- *(dst.get_unchecked_mut(converted)) = mapped;
- }
-+ // Safety: `converted <= length` upheld, since `converted < length` before this
- converted += 1;
- // Next, handle ASCII punctuation and non-ASCII without
- // going back to ASCII acceleration. Non-ASCII scripts
- // use ASCII punctuation, so this avoid going to
- // acceleration just for punctuation/space and then
- // failing. This is a significant boost to non-ASCII
- // scripts.
- // TODO: Split out Latin converters without this part
- // this stuff makes Latin script-conversion slower.
- if converted == length {
- return (pending, length, length);
- }
-+ // Safety: We are back to `converted < length` because of the == above
-+ // and can perform this check.
- let mut b = unsafe { *(src.get_unchecked(converted)) };
-+ // Safety: `converted < length` is upheld for this loop
- 'innermost: loop {
- if b > 127 {
- non_ascii = b;
- continue 'middle;
- }
- // Testing on Haswell says that we should write the
- // byte unconditionally instead of trying to unread it
- // to make it part of the next SIMD stride.
- unsafe {
-+ // Safety: `converted < length` is true for this loop
- *(dst.get_unchecked_mut(converted)) = u16::from(b);
- }
-+ // Safety: We are now at `converted <= length`. We should *not* `continue`
-+ // the loop without reverifying
- converted += 1;
- if b < 60 {
- // We've got punctuation
- if converted == length {
- return (pending, length, length);
- }
-+ // Safety: we're back to `converted <= length` because of the == above
- b = unsafe { *(src.get_unchecked(converted)) };
-+ // Safety: The loop continues as `converted < length`
- continue 'innermost;
- }
- // We've got markup or ASCII text
- continue 'outermost;
- }
- }
- }
- }
-@@ -229,16 +253,18 @@ impl SingleByteDecoder {
- }
-
- pub fn latin1_byte_compatible_up_to(&self, buffer: &[u8]) -> usize {
- let mut bytes = buffer;
- let mut total = 0;
- loop {
- if let Some((non_ascii, offset)) = validate_ascii(bytes) {
- total += offset;
-+ // Safety: We can rely on `non_ascii` being between `0x80` and `0xFF` due to
-+ // the invariants of `ascii_to_basic_latin()`, and our table has enough space for that.
- let mapped = unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) };
- if mapped != u16::from(non_ascii) {
- return total;
- }
- total += 1;
- bytes = &bytes[offset + 1..];
- } else {
- return total;
-@@ -379,64 +405,89 @@ impl SingleByteEncoder {
- dst: &mut [u8],
- _last: bool,
- ) -> (EncoderResult, usize, usize) {
- let (pending, length) = if dst.len() < src.len() {
- (EncoderResult::OutputFull, dst.len())
- } else {
- (EncoderResult::InputEmpty, src.len())
- };
-+ // Safety invariant: converted <= length. Quite often we have `converted < length`
-+ // which will be separately marked.
- let mut converted = 0usize;
- 'outermost: loop {
- match unsafe {
-+ // Safety: length is the minimum length, `src/dst + x` will always be valid for reads/writes of `len - x`
- basic_latin_to_ascii(
- src.as_ptr().add(converted),
- dst.as_mut_ptr().add(converted),
- length - converted,
- )
- } {
- None => {
- return (pending, length, length);
- }
- Some((mut non_ascii, consumed)) => {
-+ // Safety invariant: `converted <= length` upheld, since this can only consume
-+ // up to `length - converted` bytes.
-+ //
-+ // Furthermore, in this context,
-+ // we can assume `converted < length` since this branch is only ever hit when
-+ // ascii_to_basic_latin fails to consume the entire slice
- converted += consumed;
- 'middle: loop {
- // `converted` doesn't count the reading of `non_ascii` yet.
- match self.encode_u16(non_ascii) {
- Some(byte) => {
- unsafe {
-+ // Safety: we're allowed this access since `converted < length`
- *(dst.get_unchecked_mut(converted)) = byte;
- }
- converted += 1;
-+ // `converted <= length` now
- }
- None => {
- // At this point, we need to know if we
- // have a surrogate.
- let high_bits = non_ascii & 0xFC00u16;
- if high_bits == 0xD800u16 {
- // high surrogate
- if converted + 1 == length {
- // End of buffer. This surrogate is unpaired.
- return (
- EncoderResult::Unmappable('\u{FFFD}'),
- converted + 1, // +1 `for non_ascii`
- converted,
- );
- }
-+ // Safety: convered < length from outside the match, and `converted + 1 != length`,
-+ // So `converted + 1 < length` as well. We're in bounds
- let second =
- u32::from(unsafe { *src.get_unchecked(converted + 1) });
- if second & 0xFC00u32 != 0xDC00u32 {
- return (
- EncoderResult::Unmappable('\u{FFFD}'),
- converted + 1, // +1 `for non_ascii`
- converted,
- );
- }
- // The next code unit is a low surrogate.
- let astral: char = unsafe {
-+ // Safety: We can rely on non_ascii being 0xD800-0xDBFF since the high bits are 0xD800
-+ // Then, (non_ascii << 10 - 0xD800 << 10) becomes between (0 to 0x3FF) << 10, which is between
-+ // 0x400 to 0xffc00. Adding the 0x10000 gives a range of 0x10400 to 0x10fc00. Subtracting the 0xDC00
-+ // gives 0x2800 to 0x102000
-+ // The second term is between 0xDC00 and 0xDFFF from the check above. This gives a maximum
-+ // possible range of (0x10400 + 0xDC00) to (0x102000 + 0xDFFF) which is 0x1E000 to 0x10ffff.
-+ // This is in range.
-+ //
-+ // From a Unicode principles perspective this can also be verified as we have checked that `non_ascii` is a high surrogate
-+ // (0xD800..=0xDBFF), and that `second` is a low surrogate (`0xDC00..=0xDFFF`), and we are applying reverse of the UTC16 transformation
-+ // algorithm <https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF>, by applying the high surrogate - 0xD800 to the
-+ // high ten bits, and the low surrogate - 0xDc00 to the low ten bits, and then adding 0x10000
- ::core::char::from_u32_unchecked(
- (u32::from(non_ascii) << 10) + second
- - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32),
- )
- };
- return (
- EncoderResult::Unmappable(astral),
- converted + 2, // +2 `for non_ascii` and `second`
-@@ -451,52 +502,63 @@ impl SingleByteEncoder {
- converted,
- );
- }
- return (
- EncoderResult::unmappable_from_bmp(non_ascii),
- converted + 1, // +1 `for non_ascii`
- converted,
- );
-+ // Safety: This branch diverges, so no need to uphold invariants on `converted`
- }
- }
- // Next, handle ASCII punctuation and non-ASCII without
- // going back to ASCII acceleration. Non-ASCII scripts
- // use ASCII punctuation, so this avoid going to
- // acceleration just for punctuation/space and then
- // failing. This is a significant boost to non-ASCII
- // scripts.
- // TODO: Split out Latin converters without this part
- // this stuff makes Latin script-conversion slower.
- if converted == length {
- return (pending, length, length);
- }
-+ // Safety: we're back to `converted < length` due to the == above and can perform
-+ // the unchecked read
- let mut unit = unsafe { *(src.get_unchecked(converted)) };
- 'innermost: loop {
-+ // Safety: This loop always begins with `converted < length`, see
-+ // the invariant outside and the comment on the continue below
- if unit > 127 {
- non_ascii = unit;
- continue 'middle;
- }
- // Testing on Haswell says that we should write the
- // byte unconditionally instead of trying to unread it
- // to make it part of the next SIMD stride.
- unsafe {
-+ // Safety: Can rely on converted < length
- *(dst.get_unchecked_mut(converted)) = unit as u8;
- }
- converted += 1;
-+ // `converted <= length` here
- if unit < 60 {
- // We've got punctuation
- if converted == length {
- return (pending, length, length);
- }
-+ // Safety: `converted < length` due to the == above. The read is safe.
- unit = unsafe { *(src.get_unchecked(converted)) };
-+ // Safety: This only happens if `converted < length`, maintaining it
- continue 'innermost;
- }
- // We've got markup or ASCII text
- continue 'outermost;
-+ // Safety: All other routes to here diverge so the continue is the only
-+ // way to run the innermost loop.
- }
- }
- }
- }
- }
- }
- }
-
-diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs
---- third_party/rust/encoding_rs/src/x_user_defined.rs
-+++ third_party/rust/encoding_rs/src/x_user_defined.rs
-@@ -9,22 +9,23 @@
-
- use super::*;
- use crate::handles::*;
- use crate::variant::*;
-
- cfg_if! {
- if #[cfg(feature = "simd-accel")] {
- use simd_funcs::*;
-- use packed_simd::u16x8;
-+ use core::simd::u16x8;
-+ use core::simd::cmp::SimdPartialOrd;
-
- #[inline(always)]
- fn shift_upper(unpacked: u16x8) -> u16x8 {
- let highest_ascii = u16x8::splat(0x7F);
-- unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
-+ unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
- } else {
- }
- }
-
- pub struct UserDefinedDecoder;
-
- impl UserDefinedDecoder {
- pub fn new() -> VariantDecoder {
-@@ -111,20 +112,25 @@ impl UserDefinedDecoder {
- } else {
- (DecoderResult::InputEmpty, src.len())
- };
- // Not bothering with alignment
- let tail_start = length & !0xF;
- let simd_iterations = length >> 4;
- let src_ptr = src.as_ptr();
- let dst_ptr = dst.as_mut_ptr();
-+ // Safety: This is `for i in 0..length / 16`
- for i in 0..simd_iterations {
-+ // Safety: This is in bounds: length is the minumum valid length for both src/dst
-+ // and i ranges to length/16, so multiplying by 16 will always be `< length` and can do
-+ // a 16 byte read
- let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
- let (first, second) = simd_unpack(input);
- unsafe {
-+ // Safety: same as above, but this is two consecutive 8-byte reads
- store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
- store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
- }
- }
- let src_tail = &src[tail_start..length];
- let dst_tail = &mut dst[tail_start..length];
- src_tail
- .iter()