src - FreeBSD source tree

diff options


context:
space:
mode:

author	Warner Losh <imp@FreeBSD.org>	2025-02-05 23:20:13 +0000
committer	Warner Losh <imp@FreeBSD.org>	2025-02-05 23:20:13 +0000
commit	48ec896efb0b78141df004eaa21288b84590c9da (patch)
tree	33799792fd95c266d472ab1ae51d50ab4f942eb3 /include/jemalloc/internal/jemalloc_internal_inlines_c.h
parent	d28d7fbede216494aa3942af042cc084fcd6098a (diff)

jemalloc: Import 5.3.0 54eaed1d8b56b1aa528be3bdd1877e59c56fa90cvendor/jemalloc/5.3.0 vendor/jemalloc

Import jemalloc 5.3.0. This import changes how manage the jemalloc vendor branch (which was just started anyway). Starting with 5.3.0, we import a clean tree from the upstream github, removing all the old files that are no longer upstream, or that we've kept around for some reason. We do this because we merge from this raw version of jemalloc into the FreeBSD contrib/jemalloc, then we run autogen stuff, generate all the generated .h files with gmake, then finally remove much of the generated files in contrib/jemalloc using an update script. Sponsored by: Netflix

Diffstat (limited to 'include/jemalloc/internal/jemalloc_internal_inlines_c.h')

-rw-r--r--

include/jemalloc/internal/jemalloc_internal_inlines_c.h

124

1 files changed, 121 insertions, 3 deletions

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index cdb10eb21f73..b0868b7d616b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h

@@ -3,7 +3,9 @@

#include "jemalloc/internal/hook.h"

#include "jemalloc/internal/jemalloc_internal_types.h"

+#include "jemalloc/internal/log.h"

#include "jemalloc/internal/sz.h"

+#include "jemalloc/internal/thread_event.h"

#include "jemalloc/internal/witness.h"

@@ -101,8 +103,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {

}

JEMALLOC_ALWAYS_INLINE void

-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,

- bool is_internal, bool slow_path) {

+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,

+ emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) {

assert(ptr != NULL);

assert(!is_internal || tcache == NULL);

assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));

@@ -125,7 +127,7 @@ idalloc(tsd_t *tsd, void *ptr) {

JEMALLOC_ALWAYS_INLINE void

isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,

- alloc_ctx_t *alloc_ctx, bool slow_path) {

+ emap_alloc_ctx_t *alloc_ctx, bool slow_path) {

witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),

WITNESS_RANK_CORE, 0);

arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);

@@ -219,4 +221,120 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,

newsize);

}

+JEMALLOC_ALWAYS_INLINE void

+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,

+ cache_bin_t *bin, void *ret) {

+ thread_allocated_set(tsd, allocated_after);

+ if (config_stats) {

+ bin->tstats.nrequests++;

+ }

+ LOG("core.malloc.exit", "result: %p", ret);

+JEMALLOC_ALWAYS_INLINE bool

+malloc_initialized(void) {

+ return (malloc_init_state == malloc_init_initialized);

+/*

+ * malloc() fastpath. Included here so that we can inline it into operator new;

+ * function call overhead there is non-negligible as a fraction of total CPU in

+ * allocation-heavy C++ programs. We take the fallback alloc to allow malloc

+ * (which can return NULL) to differ in its behavior from operator new (which

+ * can't). It matches the signature of malloc / operator new so that we can

+ * tail-call the fallback allocator, allowing us to avoid setting up the call

+ * frame in the common case.

+ *

+ * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit

+ * tcache. If either of these is false, we tail-call to the slowpath,

+ * malloc_default(). Tail-calling is used to avoid any caller-saved

+ * registers.

+ *

+ * fastpath supports ticker and profiling, both of which will also

+ * tail-call to the slowpath if they fire.

+ */

+JEMALLOC_ALWAYS_INLINE void *

+imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {

+ LOG("core.malloc.entry", "size: %zu", size);

+ if (tsd_get_allocates() && unlikely(!malloc_initialized())) {

+ return fallback_alloc(size);

+ }

+ tsd_t *tsd = tsd_get(false);

+ if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {

+ return fallback_alloc(size);

+ }

+ /*

+ * The code below till the branch checking the next_event threshold may

+ * execute before malloc_init(), in which case the threshold is 0 to

+ * trigger slow path and initialization.

+ *

+ * Note that when uninitialized, only the fast-path variants of the sz /

+ * tsd facilities may be called.

+ */

+ szind_t ind;

+ /*

+ * The thread_allocated counter in tsd serves as a general purpose

+ * accumulator for bytes of allocation to trigger different types of

+ * events. usize is always needed to advance thread_allocated, though

+ * it's not always needed in the core allocation logic.

+ */

+ size_t usize;

+ sz_size2index_usize_fastpath(size, &ind, &usize);

+ /* Fast path relies on size being a bin. */

+ assert(ind < SC_NBINS);

+ assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&

+ (size <= SC_SMALL_MAXCLASS));

+ uint64_t allocated, threshold;

+ te_malloc_fastpath_ctx(tsd, &allocated, &threshold);

+ uint64_t allocated_after = allocated + usize;

+ /*

+ * The ind and usize might be uninitialized (or partially) before

+ * malloc_init(). The assertions check for: 1) full correctness (usize

+ * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)

+ * when !initialized.

+ */

+ if (!malloc_initialized()) {

+ assert(threshold == 0);

+ } else {

+ assert(ind == sz_size2index(size));

+ assert(usize > 0 && usize == sz_index2size(ind));

+ }

+ /*

+ * Check for events and tsd non-nominal (fast_threshold will be set to

+ * 0) in a single branch.

+ */

+ if (unlikely(allocated_after >= threshold)) {

+ return fallback_alloc(size);

+ }

+ assert(tsd_fast(tsd));

+ tcache_t *tcache = tsd_tcachep_get(tsd);

+ assert(tcache == tcache_get(tsd));

+ cache_bin_t *bin = &tcache->bins[ind];

+ bool tcache_success;

+ void *ret;

+ /*

+ * We split up the code this way so that redundant low-water

+ * computation doesn't happen on the (more common) case in which we

+ * don't touch the low water mark. The compiler won't do this

+ * duplication on its own.

+ */

+ ret = cache_bin_alloc_easy(bin, &tcache_success);

+ if (tcache_success) {

+ fastpath_success_finish(tsd, allocated_after, bin, ret);

+ return ret;

+ }

+ ret = cache_bin_alloc(bin, &tcache_success);

+ if (tcache_success) {

+ fastpath_success_finish(tsd, allocated_after, bin, ret);

+ return ret;

+ }

+ return fallback_alloc(size);

#endif /* JEMALLOC_INTERNAL_INLINES_C_H */