aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/check
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblzma/check')
-rw-r--r--src/liblzma/check/crc32_arm64.h25
-rw-r--r--src/liblzma/check/crc32_fast.c47
-rw-r--r--src/liblzma/check/crc32_table.c10
-rw-r--r--src/liblzma/check/crc32_tablegen.c2
-rw-r--r--src/liblzma/check/crc64_fast.c22
-rw-r--r--src/liblzma/check/crc64_table.c4
-rw-r--r--src/liblzma/check/crc64_tablegen.c2
-rw-r--r--src/liblzma/check/crc_common.h14
-rw-r--r--src/liblzma/check/crc_x86_clmul.h11
9 files changed, 37 insertions, 100 deletions
diff --git a/src/liblzma/check/crc32_arm64.h b/src/liblzma/check/crc32_arm64.h
index 6cdb5dab32e6..39c1c63ec0ec 100644
--- a/src/liblzma/check/crc32_arm64.h
+++ b/src/liblzma/check/crc32_arm64.h
@@ -11,7 +11,6 @@
//
///////////////////////////////////////////////////////////////////////////////
-
#ifndef LZMA_CRC32_ARM64_H
#define LZMA_CRC32_ARM64_H
@@ -21,6 +20,8 @@
# include <arm_acle.h>
#endif
+// If both versions are going to be built, we need runtime detection
+// to check if the instructions are supported.
#if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
# if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
# include <sys/auxv.h>
@@ -36,8 +37,7 @@
//
// NOTE: Build systems check for this too, keep them in sync with this.
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
-# define crc_attr_target \
- __attribute__((__target__("+crc")))
+# define crc_attr_target __attribute__((__target__("+crc")))
#else
# define crc_attr_target
#endif
@@ -51,7 +51,7 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
// Align the input buffer because this was shown to be
// significantly faster than unaligned accesses.
- const size_t align_amount = my_min(size, (8 - (uintptr_t)buf) & 7);
+ const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7);
for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
crc = __crc32b(crc, *buf);
@@ -62,7 +62,7 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
// ignoring the least significant three bits of size to ensure
// we do not process past the bounds of the buffer. This guarantees
// that limit is a multiple of 8 and is strictly less than size.
- for (const uint8_t *limit = buf + (size & ~((size_t)7));
+ for (const uint8_t *limit = buf + (size & ~(size_t)7);
buf < limit; buf += 8)
crc = __crc32d(crc, aligned_read64le(buf));
@@ -84,8 +84,10 @@ is_arch_extension_supported(void)
#elif defined(HAVE_ELF_AUX_INFO)
unsigned long feature_flags;
- elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags));
- return feature_flags & HWCAP_CRC32 != 0;
+ if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0)
+ return false;
+
+ return (feature_flags & HWCAP_CRC32) != 0;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(
@@ -98,11 +100,12 @@ is_arch_extension_supported(void)
// The sysctlbyname() function requires a string identifier for the
// CPU feature it tests. The Apple documentation lists the string
// "hw.optional.armv8_crc32", which can be found here:
- // (https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619)
- int err = sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
- &size, NULL, 0);
+ // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619
+ if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
+ &size, NULL, 0) != 0)
+ return false;
- return !err && has_crc32;
+ return has_crc32;
#else
// If a runtime detection method cannot be found, then this must
diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c
index 5e26914a4d1b..16dbb7467513 100644
--- a/src/liblzma/check/crc32_fast.c
+++ b/src/liblzma/check/crc32_fast.c
@@ -34,7 +34,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
crc = ~crc;
#ifdef WORDS_BIGENDIAN
- crc = bswap32(crc);
+ crc = byteswap32(crc);
#endif
if (size > 8) {
@@ -80,7 +80,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc);
#ifdef WORDS_BIGENDIAN
- crc = bswap32(crc);
+ crc = byteswap32(crc);
#endif
return ~crc;
@@ -97,24 +97,14 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
// If both the generic and arch-optimized implementations are built, then
// the function to use is selected at runtime because the system running
// the binary might not have the arch-specific instruction set extension(s)
-// available. The three dispatch methods in order of priority:
+// available. The dispatch methods in order of priority:
//
-// 1. Indirect function (ifunc). This method is slightly more efficient
-// than the constructor method because it will change the entry in the
-// Procedure Linkage Table (PLT) for the function either at load time or
-// at the first call. This avoids having to call the function through a
-// function pointer and will treat the function call like a regular call
-// through the PLT. ifuncs are created by using
-// __attribute__((__ifunc__("resolver"))) on a function which has no
-// body. The "resolver" is the name of the function that chooses at
-// runtime which implementation to use.
-//
-// 2. Constructor. This method uses __attribute__((__constructor__)) to
+// 1. Constructor. This method uses __attribute__((__constructor__)) to
// set crc32_func at load time. This avoids extra computation (and any
// unlikely threading bugs) on the first call to lzma_crc32() to decide
// which implementation should be used.
//
-// 3. First Call Resolution. On the very first call to lzma_crc32(), the
+// 2. First Call Resolution. On the very first call to lzma_crc32(), the
// call will be directed to crc32_dispatch() instead. This will set the
// appropriate implementation function and will not be called again.
// This method does not use any kind of locking but is safe because if
@@ -124,17 +114,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc)
typedef uint32_t (*crc32_func_type)(
const uint8_t *buf, size_t size, uint32_t crc);
-// Clang 16.0.0 and older has a bug where it marks the ifunc resolver
-// function as unused since it is static and never used outside of
-// __attribute__((__ifunc__())).
-#if defined(CRC_USE_IFUNC) && defined(__clang__)
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-
-// This resolver is shared between all three dispatch methods. It serves as
-// the ifunc resolver if ifunc is supported, otherwise it is called as a
-// regular function by the constructor or first call resolution methods.
+// This resolver is shared between all dispatch methods.
static crc32_func_type
crc32_resolve(void)
{
@@ -142,11 +122,6 @@ crc32_resolve(void)
? &crc32_arch_optimized : &crc32_generic;
}
-#if defined(CRC_USE_IFUNC) && defined(__clang__)
-# pragma GCC diagnostic pop
-#endif
-
-#ifndef CRC_USE_IFUNC
#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
// Constructor method.
@@ -171,8 +146,7 @@ crc32_set_func(void)
static uint32_t
crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc)
{
- // When __attribute__((__ifunc__(...))) and
- // __attribute__((__constructor__)) isn't supported, set the
+ // When __attribute__((__constructor__)) isn't supported, set the
// function pointer without any locking. If multiple threads run
// the detection code in parallel, they will all end up setting
// the pointer to the same value. This avoids the use of
@@ -184,14 +158,8 @@ crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc)
#endif
#endif
-#endif
-#ifdef CRC_USE_IFUNC
-extern LZMA_API(uint32_t)
-lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
- __attribute__((__ifunc__("crc32_resolve")));
-#else
extern LZMA_API(uint32_t)
lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
{
@@ -234,4 +202,3 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
return crc32_generic(buf, size, crc);
#endif
}
-#endif
diff --git a/src/liblzma/check/crc32_table.c b/src/liblzma/check/crc32_table.c
index fb1b6585422a..c141cefe5a40 100644
--- a/src/liblzma/check/crc32_table.c
+++ b/src/liblzma/check/crc32_table.c
@@ -17,18 +17,16 @@
#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6))
-# define X86_CLMUL_NO_TABLE 1
-#endif
+# define NO_CRC32_TABLE
-#if defined(HAVE_ARM64_CRC32) \
+#elif defined(HAVE_ARM64_CRC32) \
&& !defined(WORDS_BIGENDIAN) \
&& defined(__ARM_FEATURE_CRC32)
-# define ARM64_CRC32_NO_TABLE 1
+# define NO_CRC32_TABLE
#endif
-#if !defined(HAVE_ENCODERS) && (defined(X86_CLMUL_NO_TABLE) \
- || defined(ARM64_CRC32_NO_TABLE_))
+#if !defined(HAVE_ENCODERS) && defined(NO_CRC32_TABLE)
// No table needed. Use a typedef to avoid an empty translation unit.
typedef void lzma_crc32_dummy;
diff --git a/src/liblzma/check/crc32_tablegen.c b/src/liblzma/check/crc32_tablegen.c
index 01047d3eca47..b8cf459f8e76 100644
--- a/src/liblzma/check/crc32_tablegen.c
+++ b/src/liblzma/check/crc32_tablegen.c
@@ -43,7 +43,7 @@ init_crc32_table(void)
#ifdef WORDS_BIGENDIAN
for (size_t s = 0; s < 8; ++s)
for (size_t b = 0; b < 256; ++b)
- crc32_table[s][b] = bswap32(crc32_table[s][b]);
+ crc32_table[s][b] = byteswap32(crc32_table[s][b]);
#endif
return;
diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c
index f29fe3d3c5e6..0ce83fe4ad36 100644
--- a/src/liblzma/check/crc64_fast.c
+++ b/src/liblzma/check/crc64_fast.c
@@ -39,7 +39,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
crc = ~crc;
#ifdef WORDS_BIGENDIAN
- crc = bswap64(crc);
+ crc = byteswap64(crc);
#endif
if (size > 4) {
@@ -73,7 +73,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc);
#ifdef WORDS_BIGENDIAN
- crc = bswap64(crc);
+ crc = byteswap64(crc);
#endif
return ~crc;
@@ -93,11 +93,6 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
typedef uint64_t (*crc64_func_type)(
const uint8_t *buf, size_t size, uint64_t crc);
-#if defined(CRC_USE_IFUNC) && defined(__clang__)
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-
static crc64_func_type
crc64_resolve(void)
{
@@ -105,12 +100,6 @@ crc64_resolve(void)
? &crc64_arch_optimized : &crc64_generic;
}
-#if defined(CRC_USE_IFUNC) && defined(__clang__)
-# pragma GCC diagnostic pop
-#endif
-
-#ifndef CRC_USE_IFUNC
-
#ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
# define CRC64_SET_FUNC_ATTR __attribute__((__constructor__))
static crc64_func_type crc64_func;
@@ -139,14 +128,8 @@ crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
}
#endif
#endif
-#endif
-#ifdef CRC_USE_IFUNC
-extern LZMA_API(uint64_t)
-lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
- __attribute__((__ifunc__("crc64_resolve")));
-#else
extern LZMA_API(uint64_t)
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
{
@@ -171,4 +154,3 @@ lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
return crc64_generic(buf, size, crc);
#endif
}
-#endif
diff --git a/src/liblzma/check/crc64_table.c b/src/liblzma/check/crc64_table.c
index 6dee387a1fcf..78e427597ce6 100644
--- a/src/liblzma/check/crc64_table.c
+++ b/src/liblzma/check/crc64_table.c
@@ -17,11 +17,11 @@
#if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6))
-# define X86_CLMUL_NO_TABLE 1
+# define NO_CRC64_TABLE
#endif
-#ifdef X86_CLMUL_NO_TABLE
+#ifdef NO_CRC64_TABLE
// No table needed. Use a typedef to avoid an empty translation unit.
typedef void lzma_crc64_dummy;
diff --git a/src/liblzma/check/crc64_tablegen.c b/src/liblzma/check/crc64_tablegen.c
index af93e011ca21..2035127a1123 100644
--- a/src/liblzma/check/crc64_tablegen.c
+++ b/src/liblzma/check/crc64_tablegen.c
@@ -42,7 +42,7 @@ init_crc64_table(void)
#ifdef WORDS_BIGENDIAN
for (size_t s = 0; s < 4; ++s)
for (size_t b = 0; b < 256; ++b)
- crc64_table[s][b] = bswap64(crc64_table[s][b]);
+ crc64_table[s][b] = byteswap64(crc64_table[s][b]);
#endif
return;
diff --git a/src/liblzma/check/crc_common.h b/src/liblzma/check/crc_common.h
index 856665db79a8..63a7b5cefebf 100644
--- a/src/liblzma/check/crc_common.h
+++ b/src/liblzma/check/crc_common.h
@@ -67,8 +67,6 @@
#undef CRC32_ARM64
#undef CRC64_ARM64_CLMUL
-#undef CRC_USE_IFUNC
-
#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS
// ARM64 CRC32 instruction is only useful for CRC32. Currently, only
@@ -76,7 +74,7 @@
// endian machine.
//
// NOTE: Keep this and the next check in sync with the macro
-// ARM64_CRC32_NO_TABLE in crc32_table.c
+// NO_CRC32_TABLE in crc32_table.c
#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN)
// Allow ARM64 CRC32 instruction without a runtime check if
// __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the
@@ -96,7 +94,8 @@
// generic version can be omitted. Note that this doesn't work with MSVC
// as I don't know how to detect the features here.
//
-// NOTE: Keep this in sync with the CLMUL_NO_TABLE macro in crc32_table.c.
+// NOTE: Keep this in sync with the NO_CRC32_TABLE macro in crc32_table.c
+// and NO_CRC64_TABLE in crc64_table.c.
# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6)
# define CRC32_ARCH_OPTIMIZED 1
@@ -109,9 +108,6 @@
# define CRC64_ARCH_OPTIMIZED 1
# define CRC_X86_CLMUL 1
-# ifdef HAVE_FUNC_ATTRIBUTE_IFUNC
-# define CRC_USE_IFUNC 1
-# endif
/*
// The generic code is much faster with 1-8-byte inputs and
// has similar performance up to 16 bytes at least in
@@ -121,9 +117,7 @@
// for bigger inputs. It saves a little in code size since
// the special cases for 0-16-byte inputs will be omitted
// from the CLMUL code.
-# ifndef CRC_USE_IFUNC
-# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
-# endif
+# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
*/
# endif
#endif
diff --git a/src/liblzma/check/crc_x86_clmul.h b/src/liblzma/check/crc_x86_clmul.h
index ae66ca9f8c71..f1254ece18ed 100644
--- a/src/liblzma/check/crc_x86_clmul.h
+++ b/src/liblzma/check/crc_x86_clmul.h
@@ -385,15 +385,8 @@ crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc)
#endif // BUILDING_CRC64_CLMUL
-// is_arch_extension_supported() must be inlined in this header file because
-// the ifunc resolver function may not support calling a function in another
-// translation unit. Depending on compiler-toolchain and flags, a call to
-// a function defined in another translation unit could result in a
-// reference to the PLT, which is unsafe to do in an ifunc resolver. The
-// ifunc resolver runs very early when loading a shared library, so the PLT
-// entries may not be setup at that time. Inlining this function duplicates
-// the function body in crc32_resolve() and crc64_resolve(), but this is
-// acceptable because the function results in very few instructions.
+// Inlining this function duplicates the function body in crc32_resolve() and
+// crc64_resolve(), but this is acceptable because this is a tiny function.
static inline bool
is_arch_extension_supported(void)
{