diff options
Diffstat (limited to 'src/liblzma/check')
-rw-r--r-- | src/liblzma/check/crc32_arm64.h | 25 | ||||
-rw-r--r-- | src/liblzma/check/crc32_fast.c | 47 | ||||
-rw-r--r-- | src/liblzma/check/crc32_table.c | 10 | ||||
-rw-r--r-- | src/liblzma/check/crc32_tablegen.c | 2 | ||||
-rw-r--r-- | src/liblzma/check/crc64_fast.c | 22 | ||||
-rw-r--r-- | src/liblzma/check/crc64_table.c | 4 | ||||
-rw-r--r-- | src/liblzma/check/crc64_tablegen.c | 2 | ||||
-rw-r--r-- | src/liblzma/check/crc_common.h | 14 | ||||
-rw-r--r-- | src/liblzma/check/crc_x86_clmul.h | 11 |
9 files changed, 37 insertions, 100 deletions
diff --git a/src/liblzma/check/crc32_arm64.h b/src/liblzma/check/crc32_arm64.h index 6cdb5dab32e6..39c1c63ec0ec 100644 --- a/src/liblzma/check/crc32_arm64.h +++ b/src/liblzma/check/crc32_arm64.h @@ -11,7 +11,6 @@ // /////////////////////////////////////////////////////////////////////////////// - #ifndef LZMA_CRC32_ARM64_H #define LZMA_CRC32_ARM64_H @@ -21,6 +20,8 @@ # include <arm_acle.h> #endif +// If both versions are going to be built, we need runtime detection +// to check if the instructions are supported. #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) # if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) # include <sys/auxv.h> @@ -36,8 +37,7 @@ // // NOTE: Build systems check for this too, keep them in sync with this. #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) -# define crc_attr_target \ - __attribute__((__target__("+crc"))) +# define crc_attr_target __attribute__((__target__("+crc"))) #else # define crc_attr_target #endif @@ -51,7 +51,7 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) // Align the input buffer because this was shown to be // significantly faster than unaligned accesses. - const size_t align_amount = my_min(size, (8 - (uintptr_t)buf) & 7); + const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7); for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf) crc = __crc32b(crc, *buf); @@ -62,7 +62,7 @@ crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) // ignoring the least significant three bits of size to ensure // we do not process past the bounds of the buffer. This guarantees // that limit is a multiple of 8 and is strictly less than size. - for (const uint8_t *limit = buf + (size & ~((size_t)7)); + for (const uint8_t *limit = buf + (size & ~(size_t)7); buf < limit; buf += 8) crc = __crc32d(crc, aligned_read64le(buf)); @@ -84,8 +84,10 @@ is_arch_extension_supported(void) #elif defined(HAVE_ELF_AUX_INFO) unsigned long feature_flags; - elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)); - return feature_flags & HWCAP_CRC32 != 0; + if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0) + return false; + + return (feature_flags & HWCAP_CRC32) != 0; #elif defined(_WIN32) return IsProcessorFeaturePresent( @@ -98,11 +100,12 @@ is_arch_extension_supported(void) // The sysctlbyname() function requires a string identifier for the // CPU feature it tests. The Apple documentation lists the string // "hw.optional.armv8_crc32", which can be found here: - // (https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619) - int err = sysctlbyname("hw.optional.armv8_crc32", &has_crc32, - &size, NULL, 0); + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619 + if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32, + &size, NULL, 0) != 0) + return false; - return !err && has_crc32; + return has_crc32; #else // If a runtime detection method cannot be found, then this must diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c index 5e26914a4d1b..16dbb7467513 100644 --- a/src/liblzma/check/crc32_fast.c +++ b/src/liblzma/check/crc32_fast.c @@ -34,7 +34,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) crc = ~crc; #ifdef WORDS_BIGENDIAN - crc = bswap32(crc); + crc = byteswap32(crc); #endif if (size > 8) { @@ -80,7 +80,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); #ifdef WORDS_BIGENDIAN - crc = bswap32(crc); + crc = byteswap32(crc); #endif return ~crc; @@ -97,24 +97,14 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) // If both the generic and arch-optimized implementations are built, then // the function to use is selected at runtime because the system running // the binary might not have the arch-specific instruction set extension(s) -// available. The three dispatch methods in order of priority: +// available. The dispatch methods in order of priority: // -// 1. Indirect function (ifunc). This method is slightly more efficient -// than the constructor method because it will change the entry in the -// Procedure Linkage Table (PLT) for the function either at load time or -// at the first call. This avoids having to call the function through a -// function pointer and will treat the function call like a regular call -// through the PLT. ifuncs are created by using -// __attribute__((__ifunc__("resolver"))) on a function which has no -// body. The "resolver" is the name of the function that chooses at -// runtime which implementation to use. -// -// 2. Constructor. This method uses __attribute__((__constructor__)) to +// 1. Constructor. This method uses __attribute__((__constructor__)) to // set crc32_func at load time. This avoids extra computation (and any // unlikely threading bugs) on the first call to lzma_crc32() to decide // which implementation should be used. // -// 3. First Call Resolution. On the very first call to lzma_crc32(), the +// 2. First Call Resolution. On the very first call to lzma_crc32(), the // call will be directed to crc32_dispatch() instead. This will set the // appropriate implementation function and will not be called again. // This method does not use any kind of locking but is safe because if @@ -124,17 +114,7 @@ crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) typedef uint32_t (*crc32_func_type)( const uint8_t *buf, size_t size, uint32_t crc); -// Clang 16.0.0 and older has a bug where it marks the ifunc resolver -// function as unused since it is static and never used outside of -// __attribute__((__ifunc__())). -#if defined(CRC_USE_IFUNC) && defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -// This resolver is shared between all three dispatch methods. It serves as -// the ifunc resolver if ifunc is supported, otherwise it is called as a -// regular function by the constructor or first call resolution methods. +// This resolver is shared between all dispatch methods. static crc32_func_type crc32_resolve(void) { @@ -142,11 +122,6 @@ crc32_resolve(void) ? &crc32_arch_optimized : &crc32_generic; } -#if defined(CRC_USE_IFUNC) && defined(__clang__) -# pragma GCC diagnostic pop -#endif - -#ifndef CRC_USE_IFUNC #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR // Constructor method. @@ -171,8 +146,7 @@ crc32_set_func(void) static uint32_t crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) { - // When __attribute__((__ifunc__(...))) and - // __attribute__((__constructor__)) isn't supported, set the + // When __attribute__((__constructor__)) isn't supported, set the // function pointer without any locking. If multiple threads run // the detection code in parallel, they will all end up setting // the pointer to the same value. This avoids the use of @@ -184,14 +158,8 @@ crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) #endif #endif -#endif -#ifdef CRC_USE_IFUNC -extern LZMA_API(uint32_t) -lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) - __attribute__((__ifunc__("crc32_resolve"))); -#else extern LZMA_API(uint32_t) lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) { @@ -234,4 +202,3 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) return crc32_generic(buf, size, crc); #endif } -#endif diff --git a/src/liblzma/check/crc32_table.c b/src/liblzma/check/crc32_table.c index fb1b6585422a..c141cefe5a40 100644 --- a/src/liblzma/check/crc32_table.c +++ b/src/liblzma/check/crc32_table.c @@ -17,18 +17,16 @@ #if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \ && defined(__SSE4_1__) && defined(__PCLMUL__)) \ || (defined(__e2k__) && __iset__ >= 6)) -# define X86_CLMUL_NO_TABLE 1 -#endif +# define NO_CRC32_TABLE -#if defined(HAVE_ARM64_CRC32) \ +#elif defined(HAVE_ARM64_CRC32) \ && !defined(WORDS_BIGENDIAN) \ && defined(__ARM_FEATURE_CRC32) -# define ARM64_CRC32_NO_TABLE 1 +# define NO_CRC32_TABLE #endif -#if !defined(HAVE_ENCODERS) && (defined(X86_CLMUL_NO_TABLE) \ - || defined(ARM64_CRC32_NO_TABLE_)) +#if !defined(HAVE_ENCODERS) && defined(NO_CRC32_TABLE) // No table needed. Use a typedef to avoid an empty translation unit. typedef void lzma_crc32_dummy; diff --git a/src/liblzma/check/crc32_tablegen.c b/src/liblzma/check/crc32_tablegen.c index 01047d3eca47..b8cf459f8e76 100644 --- a/src/liblzma/check/crc32_tablegen.c +++ b/src/liblzma/check/crc32_tablegen.c @@ -43,7 +43,7 @@ init_crc32_table(void) #ifdef WORDS_BIGENDIAN for (size_t s = 0; s < 8; ++s) for (size_t b = 0; b < 256; ++b) - crc32_table[s][b] = bswap32(crc32_table[s][b]); + crc32_table[s][b] = byteswap32(crc32_table[s][b]); #endif return; diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c index f29fe3d3c5e6..0ce83fe4ad36 100644 --- a/src/liblzma/check/crc64_fast.c +++ b/src/liblzma/check/crc64_fast.c @@ -39,7 +39,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) crc = ~crc; #ifdef WORDS_BIGENDIAN - crc = bswap64(crc); + crc = byteswap64(crc); #endif if (size > 4) { @@ -73,7 +73,7 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); #ifdef WORDS_BIGENDIAN - crc = bswap64(crc); + crc = byteswap64(crc); #endif return ~crc; @@ -93,11 +93,6 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc) typedef uint64_t (*crc64_func_type)( const uint8_t *buf, size_t size, uint64_t crc); -#if defined(CRC_USE_IFUNC) && defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - static crc64_func_type crc64_resolve(void) { @@ -105,12 +100,6 @@ crc64_resolve(void) ? &crc64_arch_optimized : &crc64_generic; } -#if defined(CRC_USE_IFUNC) && defined(__clang__) -# pragma GCC diagnostic pop -#endif - -#ifndef CRC_USE_IFUNC - #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR # define CRC64_SET_FUNC_ATTR __attribute__((__constructor__)) static crc64_func_type crc64_func; @@ -139,14 +128,8 @@ crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc) } #endif #endif -#endif -#ifdef CRC_USE_IFUNC -extern LZMA_API(uint64_t) -lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) - __attribute__((__ifunc__("crc64_resolve"))); -#else extern LZMA_API(uint64_t) lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) { @@ -171,4 +154,3 @@ lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) return crc64_generic(buf, size, crc); #endif } -#endif diff --git a/src/liblzma/check/crc64_table.c b/src/liblzma/check/crc64_table.c index 6dee387a1fcf..78e427597ce6 100644 --- a/src/liblzma/check/crc64_table.c +++ b/src/liblzma/check/crc64_table.c @@ -17,11 +17,11 @@ #if defined(HAVE_USABLE_CLMUL) && ((defined(__x86_64__) && defined(__SSSE3__) \ && defined(__SSE4_1__) && defined(__PCLMUL__)) \ || (defined(__e2k__) && __iset__ >= 6)) -# define X86_CLMUL_NO_TABLE 1 +# define NO_CRC64_TABLE #endif -#ifdef X86_CLMUL_NO_TABLE +#ifdef NO_CRC64_TABLE // No table needed. Use a typedef to avoid an empty translation unit. typedef void lzma_crc64_dummy; diff --git a/src/liblzma/check/crc64_tablegen.c b/src/liblzma/check/crc64_tablegen.c index af93e011ca21..2035127a1123 100644 --- a/src/liblzma/check/crc64_tablegen.c +++ b/src/liblzma/check/crc64_tablegen.c @@ -42,7 +42,7 @@ init_crc64_table(void) #ifdef WORDS_BIGENDIAN for (size_t s = 0; s < 4; ++s) for (size_t b = 0; b < 256; ++b) - crc64_table[s][b] = bswap64(crc64_table[s][b]); + crc64_table[s][b] = byteswap64(crc64_table[s][b]); #endif return; diff --git a/src/liblzma/check/crc_common.h b/src/liblzma/check/crc_common.h index 856665db79a8..63a7b5cefebf 100644 --- a/src/liblzma/check/crc_common.h +++ b/src/liblzma/check/crc_common.h @@ -67,8 +67,6 @@ #undef CRC32_ARM64 #undef CRC64_ARM64_CLMUL -#undef CRC_USE_IFUNC - #undef CRC_USE_GENERIC_FOR_SMALL_INPUTS // ARM64 CRC32 instruction is only useful for CRC32. Currently, only @@ -76,7 +74,7 @@ // endian machine. // // NOTE: Keep this and the next check in sync with the macro -// ARM64_CRC32_NO_TABLE in crc32_table.c +// NO_CRC32_TABLE in crc32_table.c #if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN) // Allow ARM64 CRC32 instruction without a runtime check if // __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the @@ -96,7 +94,8 @@ // generic version can be omitted. Note that this doesn't work with MSVC // as I don't know how to detect the features here. // -// NOTE: Keep this in sync with the CLMUL_NO_TABLE macro in crc32_table.c. +// NOTE: Keep this in sync with the NO_CRC32_TABLE macro in crc32_table.c +// and NO_CRC64_TABLE in crc64_table.c. # if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \ || (defined(__e2k__) && __iset__ >= 6) # define CRC32_ARCH_OPTIMIZED 1 @@ -109,9 +108,6 @@ # define CRC64_ARCH_OPTIMIZED 1 # define CRC_X86_CLMUL 1 -# ifdef HAVE_FUNC_ATTRIBUTE_IFUNC -# define CRC_USE_IFUNC 1 -# endif /* // The generic code is much faster with 1-8-byte inputs and // has similar performance up to 16 bytes at least in @@ -121,9 +117,7 @@ // for bigger inputs. It saves a little in code size since // the special cases for 0-16-byte inputs will be omitted // from the CLMUL code. -# ifndef CRC_USE_IFUNC -# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1 -# endif +# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1 */ # endif #endif diff --git a/src/liblzma/check/crc_x86_clmul.h b/src/liblzma/check/crc_x86_clmul.h index ae66ca9f8c71..f1254ece18ed 100644 --- a/src/liblzma/check/crc_x86_clmul.h +++ b/src/liblzma/check/crc_x86_clmul.h @@ -385,15 +385,8 @@ crc64_arch_optimized(const uint8_t *buf, size_t size, uint64_t crc) #endif // BUILDING_CRC64_CLMUL -// is_arch_extension_supported() must be inlined in this header file because -// the ifunc resolver function may not support calling a function in another -// translation unit. Depending on compiler-toolchain and flags, a call to -// a function defined in another translation unit could result in a -// reference to the PLT, which is unsafe to do in an ifunc resolver. The -// ifunc resolver runs very early when loading a shared library, so the PLT -// entries may not be setup at that time. Inlining this function duplicates -// the function body in crc32_resolve() and crc64_resolve(), but this is -// acceptable because the function results in very few instructions. +// Inlining this function duplicates the function body in crc32_resolve() and +// crc64_resolve(), but this is acceptable because this is a tiny function. static inline bool is_arch_extension_supported(void) { |