aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 21:19:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-03-20 21:25:35 +0000
commitbfffb66e7f3cf9766e45a23080200c18535c2b85 (patch)
tree50722022cd29ef0d3f728dfa8ac55eed6d7facf3
parent95e96c00fd5ae57a9c3a0c4032822170c2fc4d13 (diff)
downloadsrc-bfffb66e7f3cf9766e45a23080200c18535c2b85.tar.gz
src-bfffb66e7f3cf9766e45a23080200c18535c2b85.zip
Reapply libcxxrt atomics cleanup commit, preparing for upstream fix
This reapplies upstream commit fd484be, as there is a follow-up fix for the possible hangs in __cxa_guard_acquire() on i386: Atomics cleanup (#11) We need to test exception specifiers but they're gone in C++17 so compile the tests with an older version of the standard. Rewrite the guard logic to be more idiomatic C++ and more comprehensible and make sure that atomics are used where necessary. Obtained from: https://github.com/libcxxrt/libcxxrt/commit/fd484be Fixes: 56aaed388b0a MFC after: 2 weeks
-rw-r--r--contrib/libcxxrt/atomic.h110
-rw-r--r--contrib/libcxxrt/exception.cc21
-rw-r--r--contrib/libcxxrt/guard.cc368
-rw-r--r--contrib/libcxxrt/memory.cc7
4 files changed, 374 insertions, 132 deletions
diff --git a/contrib/libcxxrt/atomic.h b/contrib/libcxxrt/atomic.h
index 131ca9f57798..701d05337cf1 100644
--- a/contrib/libcxxrt/atomic.h
+++ b/contrib/libcxxrt/atomic.h
@@ -1,30 +1,102 @@
#ifndef __has_builtin
-#define __has_builtin(x) 0
+# define __has_builtin(x) 0
#endif
#ifndef __has_feature
-#define __has_feature(x) 0
+# define __has_feature(x) 0
+#endif
+#ifndef __has_extension
+# define __has_extension(x) 0
+#endif
+
+#if !__has_extension(c_atomic)
+# define _Atomic(T) T
#endif
-/**
- * Swap macro that enforces a happens-before relationship with a corresponding
- * ATOMIC_LOAD.
- */
#if __has_builtin(__c11_atomic_exchange)
-#define ATOMIC_SWAP(addr, val)\
- __c11_atomic_exchange(reinterpret_cast<_Atomic(__typeof__(val))*>(addr), val, __ATOMIC_ACQ_REL)
-#elif __has_builtin(__sync_swap)
-#define ATOMIC_SWAP(addr, val)\
- __sync_swap(addr, val)
+# define ATOMIC_BUILTIN(name) __c11_atomic_##name
#else
-#define ATOMIC_SWAP(addr, val)\
- __sync_lock_test_and_set(addr, val)
+# define ATOMIC_BUILTIN(name) __atomic_##name##_n
#endif
-#if __has_builtin(__c11_atomic_load)
-#define ATOMIC_LOAD(addr)\
- __c11_atomic_load(reinterpret_cast<_Atomic(__typeof__(*addr))*>(addr), __ATOMIC_ACQUIRE)
+namespace
+{
+ /**
+ * C++11 memory orders. We only need a subset of them.
+ */
+ enum memory_order
+ {
+ /**
+ * Acquire order.
+ */
+ acquire = __ATOMIC_ACQUIRE,
+
+ /**
+ * Release order.
+ */
+ release = __ATOMIC_RELEASE,
+
+ /**
+ * Sequentially consistent memory ordering.
+ */
+ seqcst = __ATOMIC_SEQ_CST
+ };
+
+ /**
+ * Atomic, implements a subset of `std::atomic`.
+ */
+ template<typename T>
+ class atomic
+ {
+ /**
+ * The underlying value. Use C11 atomic qualification if available.
+ */
+ _Atomic(T) val;
+
+ public:
+ /**
+ * Constructor, takes a value.
+ */
+ atomic(T init) : val(init) {}
+
+ /**
+ * Atomically load with the specified memory order.
+ */
+ T load(memory_order order = memory_order::seqcst)
+ {
+ return ATOMIC_BUILTIN(load)(&val, order);
+ }
+
+ /**
+ * Atomically store with the specified memory order.
+ */
+ void store(T v, memory_order order = memory_order::seqcst)
+ {
+ return ATOMIC_BUILTIN(store)(&val, v, order);
+ }
+
+ /**
+ * Atomically exchange with the specified memory order.
+ */
+ T exchange(T v, memory_order order = memory_order::seqcst)
+ {
+ return ATOMIC_BUILTIN(exchange)(&val, v, order);
+ }
+
+ /**
+ * Atomically exchange with the specified memory order.
+ */
+ bool compare_exchange(T & expected,
+ T desired,
+ memory_order order = memory_order::seqcst)
+ {
+#if __has_builtin(__c11_atomic_compare_exchange_strong)
+ return __c11_atomic_compare_exchange_strong(
+ &val, &expected, desired, order, order);
#else
-#define ATOMIC_LOAD(addr)\
- (__sync_synchronize(), *addr)
+ return __atomic_compare_exchange_n(
+ &val, &expected, desired, true, order, order);
#endif
-
+ }
+ };
+} // namespace
+#undef ATOMIC_BUILTIN
diff --git a/contrib/libcxxrt/exception.cc b/contrib/libcxxrt/exception.cc
index 0fb26ddb4ed2..2f1dc4030ba4 100644
--- a/contrib/libcxxrt/exception.cc
+++ b/contrib/libcxxrt/exception.cc
@@ -1,5 +1,6 @@
/*
* Copyright 2010-2011 PathScale, Inc. All rights reserved.
+ * Copyright 2021 David Chisnall. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -289,9 +290,9 @@ using namespace ABI_NAMESPACE;
/** The global termination handler. */
-static terminate_handler terminateHandler = abort;
+static atomic<terminate_handler> terminateHandler = abort;
/** The global unexpected exception handler. */
-static unexpected_handler unexpectedHandler = std::terminate;
+static atomic<unexpected_handler> unexpectedHandler = std::terminate;
/** Key used for thread-local data. */
static pthread_key_t eh_key;
@@ -744,12 +745,12 @@ static void throw_exception(__cxa_exception *ex)
ex->unexpectedHandler = info->unexpectedHandler;
if (0 == ex->unexpectedHandler)
{
- ex->unexpectedHandler = unexpectedHandler;
+ ex->unexpectedHandler = unexpectedHandler.load();
}
ex->terminateHandler = info->terminateHandler;
if (0 == ex->terminateHandler)
{
- ex->terminateHandler = terminateHandler;
+ ex->terminateHandler = terminateHandler.load();
}
info->globals.uncaughtExceptions++;
@@ -1449,7 +1450,7 @@ namespace std
{
if (thread_local_handlers) { return pathscale::set_unexpected(f); }
- return ATOMIC_SWAP(&unexpectedHandler, f);
+ return unexpectedHandler.exchange(f);
}
/**
* Sets the function that is called to terminate the program.
@@ -1458,7 +1459,7 @@ namespace std
{
if (thread_local_handlers) { return pathscale::set_terminate(f); }
- return ATOMIC_SWAP(&terminateHandler, f);
+ return terminateHandler.exchange(f);
}
/**
* Terminates the program, calling a custom terminate implementation if
@@ -1474,7 +1475,7 @@ namespace std
// return.
abort();
}
- terminateHandler();
+ terminateHandler.load()();
}
/**
* Called when an unexpected exception is encountered (i.e. an exception
@@ -1491,7 +1492,7 @@ namespace std
// return.
abort();
}
- unexpectedHandler();
+ unexpectedHandler.load()();
}
/**
* Returns whether there are any exceptions currently being thrown that
@@ -1521,7 +1522,7 @@ namespace std
{
return info->unexpectedHandler;
}
- return ATOMIC_LOAD(&unexpectedHandler);
+ return unexpectedHandler.load();
}
/**
* Returns the current terminate handler.
@@ -1533,7 +1534,7 @@ namespace std
{
return info->terminateHandler;
}
- return ATOMIC_LOAD(&terminateHandler);
+ return terminateHandler.load();
}
}
#if defined(__arm__) && !defined(__ARM_DWARF_EH__)
diff --git a/contrib/libcxxrt/guard.cc b/contrib/libcxxrt/guard.cc
index 34d294cf7432..515992563a10 100644
--- a/contrib/libcxxrt/guard.cc
+++ b/contrib/libcxxrt/guard.cc
@@ -1,5 +1,6 @@
-/*
+/*
* Copyright 2010-2012 PathScale, Inc. All rights reserved.
+ * Copyright 2021 David Chisnall. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -10,7 +11,7 @@
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -38,126 +39,305 @@
* value as a low-overhead lock. Because statics (in most sane code) are
* accessed far more times than they are initialised, this lock implementation
* is heavily optimised towards the case where the static has already been
- * initialised.
+ * initialised.
*/
+#include "atomic.h"
+#include <assert.h>
+#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
-#include <stdio.h>
-#include <pthread.h>
-#include <assert.h>
-#include "atomic.h"
// Older GCC doesn't define __LITTLE_ENDIAN__
#ifndef __LITTLE_ENDIAN__
- // If __BYTE_ORDER__ is defined, use that instead
+// If __BYTE_ORDER__ is defined, use that instead
# ifdef __BYTE_ORDER__
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __LITTLE_ENDIAN__
# endif
- // x86 and ARM are the most common little-endian CPUs, so let's have a
- // special case for them (ARM is already special cased). Assume everything
- // else is big endian.
+// x86 and ARM are the most common little-endian CPUs, so let's have a
+// special case for them (ARM is already special cased). Assume everything
+// else is big endian.
# elif defined(__x86_64) || defined(__i386)
# define __LITTLE_ENDIAN__
# endif
#endif
-
/*
- * The least significant bit of the guard variable indicates that the object
- * has been initialised, the most significant bit is used for a spinlock.
+ * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)
+ * values with one bit defined to indicate that the guarded variable is and
+ * another bit to indicate that it's currently locked (initialisation in
+ * progress). The bit to use depends on the byte order of the target.
+ *
+ * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we
+ * treat the two halves of the 64-bit word as independent values and
*/
+namespace
+{
+ /**
+ * The state of the guard variable when an attempt is made to lock it.
+ */
+ enum class GuardState
+ {
+ /**
+ * The lock is not held but is not needed because initialisation is
+ * one.
+ */
+ InitDone,
+
+ /**
+ * Initialisation is not done but the lock is held by the caller.
+ */
+ InitLockSucceeded,
+
+ /**
+ * Attempting to acquire the lock failed.
+ */
+ InitLockFailed
+ };
+
+ /**
+ * Class encapsulating a single atomic word being used to represent the
+ * guard. The word size is defined by the type of `GuardWord`. The bit
+ * used to indicate the locked state is `1<<LockedBit`, the bit used to
+ * indicate the initialised state is `1<<InitBit`.
+ */
+ template<typename GuardWord, int LockedBit, int InitBit>
+ struct SingleWordGuard
+ {
+ /**
+ * The value indicating that the lock bit is set (and no other bits).
+ */
+ static constexpr GuardWord locked = static_cast<GuardWord>(1)
+ << LockedBit;
+
+ /**
+ * The value indicating that the initialised bit is set (and all other
+ * bits are zero).
+ */
+ static constexpr GuardWord initialised = static_cast<GuardWord>(1)
+ << InitBit;
+
+ /**
+ * The guard variable.
+ */
+ atomic<GuardWord> val;
+
+ public:
+ /**
+ * Release the lock and set the initialised state. In the single-word
+ * implementation here, these are both done by a single store.
+ */
+ void unlock(bool isInitialised)
+ {
+ val.store(isInitialised ? initialised : 0, memory_order::release);
+#ifndef NDEBUG
+ GuardWord init_state = initialised;
+ assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);
+#endif
+ }
+
+ /**
+ * Try to acquire the lock. This has a tri-state return, indicating
+ * either that the lock was acquired, it wasn't acquired because it was
+ * contended, or it wasn't acquired because the guarded variable is
+ * already initialised.
+ */
+ GuardState try_lock()
+ {
+ GuardWord old = 0;
+ // Try to acquire the lock, assuming that we are in the state where
+ // the lock is not held and the variable is not initialised (so the
+ // expected value is 0).
+ if (val.compare_exchange(old, locked))
+ {
+ return GuardState::InitLockSucceeded;
+ }
+ // If the CAS failed and the old value indicates that this is
+ // initialised, return that initialisation is done and skip further
+ // retries.
+ if (old == initialised)
+ {
+ return GuardState::InitDone;
+ }
+ // Otherwise, report failure.
+ return GuardState::InitLockFailed;
+ }
+
+ /**
+ * Check whether the guard indicates that the variable is initialised.
+ */
+ bool is_initialised()
+ {
+ return (val.load(memory_order::acquire) & initialised) ==
+ initialised;
+ }
+ };
+
+ /**
+ * Class encapsulating using two 32-bit atomic values to represent a 64-bit
+ * guard variable.
+ */
+ template<int LockedBit, int InitBit>
+ class DoubleWordGuard
+ {
+ /**
+ * The value of `lock_word` when the lock is held.
+ */
+ static constexpr uint32_t locked = static_cast<uint32_t>(1)
+ << LockedBit;
+
+ /**
+ * The value of `init_word` when the guarded variable is initialised.
+ */
+ static constexpr uint32_t initialised = static_cast<uint32_t>(1)
+ << InitBit;
+
+ /**
+ * The word used for the initialised flag. This is always the first
+ * word irrespective of endian because the generated code compares the
+ * first byte in memory against 0.
+ */
+ atomic<uint32_t> init_word;
+
+ /**
+ * The word used for the lock.
+ */
+ atomic<uint32_t> lock_word;
+
+ public:
+ /**
+ * Try to acquire the lock. This has a tri-state return, indicating
+ * either that the lock was acquired, it wasn't acquired because it was
+ * contended, or it wasn't acquired because the guarded variable is
+ * already initialised.
+ */
+ GuardState try_lock()
+ {
+ uint32_t old = 0;
+ // Try to acquire the lock
+ if (lock_word.compare_exchange(old, locked))
+ {
+ // If we succeeded, check if initialisation has happened. In
+ // this version, we don't have atomic manipulation of both the
+ // lock and initialised bits together. Instead, we have an
+ // ordering rule that the initialised bit is only ever updated
+ // with the lock held.
+ if (is_initialised())
+ {
+ // If another thread did manage to initialise this, release
+ // the lock and notify the caller that initialisation is
+ // done.
+ lock_word.store(initialised, memory_order::release);
+ return GuardState::InitDone;
+ }
+ return GuardState::InitLockSucceeded;
+ }
+ return GuardState::InitLockFailed;
+ }
+
+ /**
+ * Set the initialised state and release the lock. In this
+ * implementation, this is ordered, not atomic: the initialise bit is
+ * set while the lock is held.
+ */
+ void unlock(bool isInitialised)
+ {
+ init_word.store(isInitialised ? initialised : 0,
+ memory_order::release);
+ lock_word.store(0, memory_order::release);
+ assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);
+ }
+
+ /**
+ * Return whether the guarded variable is initialised.
+ */
+ bool is_initialised()
+ {
+ return (init_word.load(memory_order::acquire) & initialised) ==
+ initialised;
+ }
+ };
+
+ // Check that the two implementations are the correct size.
+ static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),
+ "Single-word 32-bit guard must be 32 bits");
+ static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),
+ "Single-word 64-bit guard must be 64 bits");
+ static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),
+ "Double-word guard must be 64 bits");
+
#ifdef __arm__
-// ARM ABI - 32-bit guards.
-typedef uint32_t guard_t;
-typedef uint32_t guard_lock_t;
-static const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
-static const uint32_t INITIALISED = 1;
-#define LOCK_PART(guard) (guard)
-#define INIT_PART(guard) (guard)
+ /**
+ * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.
+ */
+ using Guard = SingleWordGuard<uint32_t, 31, 0>;
#elif defined(_LP64)
-typedef uint64_t guard_t;
-typedef uint64_t guard_lock_t;
# if defined(__LITTLE_ENDIAN__)
-static const guard_t LOCKED = static_cast<guard_t>(1) << 63;
-static const guard_t INITIALISED = 1;
+ /**
+ * On little-endian 64-bit platforms the guard word is a single 64-bit
+ * atomic with the lock in the high bit and the initialised flag in the low
+ * bit.
+ */
+ using Guard = SingleWordGuard<uint64_t, 63, 0>;
# else
-static const guard_t LOCKED = 1;
-static const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
+ /**
+ * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic
+ * with the lock in the low bit and the initialised bit in the highest
+ * byte.
+ */
+ using Guard = SingleWordGuard<uint64_t, 0, 56>;
# endif
-#define LOCK_PART(guard) (guard)
-#define INIT_PART(guard) (guard)
#else
-typedef uint32_t guard_lock_t;
# if defined(__LITTLE_ENDIAN__)
-typedef struct {
- uint32_t init_half;
- uint32_t lock_half;
-} guard_t;
-static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
-static const uint32_t INITIALISED = 1;
+ /**
+ * 32-bit platforms use the same layout as 64-bit.
+ */
+ using Guard = DoubleWordGuard<31, 0>;
# else
-typedef struct {
- uint32_t init_half;
- uint32_t lock_half;
-} guard_t;
-static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
-static const uint32_t LOCKED = 1;
-static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
+ /**
+ * 32-bit platforms use the same layout as 64-bit.
+ */
+ using Guard = DoubleWordGuard<0, 24>;
# endif
-#define LOCK_PART(guard) (&(guard)->lock_half)
-#define INIT_PART(guard) (&(guard)->init_half)
#endif
-static const guard_lock_t INITIAL = 0;
+
+} // namespace
/**
* Acquires a lock on a guard, returning 0 if the object has already been
* initialised, and 1 if it has not. If the object is already constructed then
* this function just needs to read a byte from memory and return.
*/
-extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
+extern "C" int __cxa_guard_acquire(Guard *guard_object)
{
- guard_lock_t old;
- // Not an atomic read, doesn't establish a happens-before relationship, but
- // if one is already established and we end up seeing an initialised state
- // then it's a fast path, otherwise we'll do something more expensive than
- // this test anyway...
- if (INITIALISED == *INIT_PART(guard_object))
+ // Check if this is already initialised. If so, we don't have to do
+ // anything.
+ if (guard_object->is_initialised())
+ {
return 0;
- // Spin trying to do the initialisation
+ }
+ // Spin trying to acquire the lock. If we fail to acquire the lock the
+ // first time then another thread will *probably* initialise it, but if the
+ // constructor throws an exception then we may have to try again in this
+ // thread.
for (;;)
{
- // Loop trying to move the value of the guard from 0 (not
- // locked, not initialised) to the locked-uninitialised
- // position.
- old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
- INITIAL, LOCKED);
- if (old == INITIAL) {
- // Lock obtained. If lock and init bit are
- // in separate words, check for init race.
- if (INIT_PART(guard_object) == LOCK_PART(guard_object))
- return 1;
- if (INITIALISED != *INIT_PART(guard_object))
+ // Try to acquire the lock.
+ switch (guard_object->try_lock())
+ {
+ // If we failed to acquire the lock but another thread has
+ // initialised the lock while we were waiting, return immediately
+ // indicating that initialisation is not required.
+ case GuardState::InitDone:
+ return 0;
+ // If we acquired the lock, return immediately to start
+ // initialisation.
+ case GuardState::InitLockSucceeded:
return 1;
-
- // No need for a memory barrier here,
- // see first comment.
- *LOCK_PART(guard_object) = INITIAL;
- return 0;
+ // If we didn't acquire the lock, pause and retry.
+ case GuardState::InitLockFailed:
+ break;
}
- // If lock and init bit are in the same word, check again
- // if we are done.
- if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
- old == INITIALISED)
- return 0;
-
- assert(old == LOCKED);
- // Another thread holds the lock.
- // If lock and init bit are in different words, check
- // if we are done before yielding and looping.
- if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
- INITIALISED == *INIT_PART(guard_object))
- return 0;
sched_yield();
}
}
@@ -166,28 +346,16 @@ extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
* Releases the lock without marking the object as initialised. This function
* is called if initialising a static causes an exception to be thrown.
*/
-extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
+extern "C" void __cxa_guard_abort(Guard *guard_object)
{
- __attribute__((unused))
- bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
- LOCKED, INITIAL);
- assert(reset);
+ guard_object->unlock(false);
}
+
/**
* Releases the guard and marks the object as initialised. This function is
* called after successful initialisation of a static.
*/
-extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
+extern "C" void __cxa_guard_release(Guard *guard_object)
{
- guard_lock_t old;
- if (INIT_PART(guard_object) == LOCK_PART(guard_object))
- old = LOCKED;
- else
- old = INITIAL;
- __attribute__((unused))
- bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
- old, INITIALISED);
- assert(reset);
- if (INIT_PART(guard_object) != LOCK_PART(guard_object))
- *LOCK_PART(guard_object) = INITIAL;
+ guard_object->unlock(true);
}
diff --git a/contrib/libcxxrt/memory.cc b/contrib/libcxxrt/memory.cc
index 6dd43a5b897e..7beb048ae914 100644
--- a/contrib/libcxxrt/memory.cc
+++ b/contrib/libcxxrt/memory.cc
@@ -51,7 +51,7 @@ typedef void (*new_handler)();
* The function to call when allocation fails. By default, there is no
* handler and a bad allocation exception is thrown if an allocation fails.
*/
-static new_handler new_handl;
+static atomic<new_handler> new_handl{nullptr};
namespace std
{
@@ -61,12 +61,13 @@ namespace std
__attribute__((weak))
new_handler set_new_handler(new_handler handler)
{
- return ATOMIC_SWAP(&new_handl, handler);
+ return new_handl.exchange(handler);
}
+
__attribute__((weak))
new_handler get_new_handler(void)
{
- return ATOMIC_LOAD(&new_handl);
+ return new_handl.load();
}
}