diff options
Diffstat (limited to 'compiler-rt/lib/scudo/standalone')
39 files changed, 1803 insertions, 813 deletions
diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h index 12daaa2f6b44..e6f46b511dbf 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -21,24 +21,65 @@ namespace scudo { +// The combined allocator uses a structure as a template argument that +// specifies the configuration options for the various subcomponents of the +// allocator. +// +// struct ExampleConfig { +// // SizeClasMmap to use with the Primary. +// using SizeClassMap = DefaultSizeClassMap; +// // Indicates possible support for Memory Tagging. +// static const bool MaySupportMemoryTagging = false; +// // Defines the Primary allocator to use. +// typedef SizeClassAllocator64<ExampleConfig> Primary; +// // Log2 of the size of a size class region, as used by the Primary. +// static const uptr PrimaryRegionSizeLog = 30U; +// // Defines the type and scale of a compact pointer. A compact pointer can +// // be understood as the offset of a pointer within the region it belongs +// // to, in increments of a power-of-2 scale. +// // eg: Ptr = Base + (CompactPtr << Scale). +// typedef u32 PrimaryCompactPtrT; +// static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; +// // Indicates support for offsetting the start of a region by +// // a random number of pages. Only used with primary64. +// static const bool PrimaryEnableRandomOffset = true; +// // Call map for user memory with at least this size. Only used with +// // primary64. +// static const uptr PrimaryMapSizeIncrement = 1UL << 18; +// // Defines the minimal & maximal release interval that can be set. +// static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; +// static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; +// // Defines the type of cache used by the Secondary. Some additional +// // configuration entries can be necessary depending on the Cache. +// typedef MapAllocatorNoCache SecondaryCache; +// // Thread-Specific Data Registry used, shared or exclusive. +// template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; +// }; + // Default configurations for various platforms. struct DefaultConfig { using SizeClassMap = DefaultSizeClassMap; - static const bool MaySupportMemoryTagging = false; + static const bool MaySupportMemoryTagging = true; #if SCUDO_CAN_USE_PRIMARY64 typedef SizeClassAllocator64<DefaultConfig> Primary; - static const uptr PrimaryRegionSizeLog = 30U; + static const uptr PrimaryRegionSizeLog = 32U; + typedef uptr PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = 0; + static const bool PrimaryEnableRandomOffset = true; + static const uptr PrimaryMapSizeIncrement = 1UL << 18; #else typedef SizeClassAllocator32<DefaultConfig> Primary; static const uptr PrimaryRegionSizeLog = 19U; + typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; typedef MapAllocatorCache<DefaultConfig> SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 32U; + static const u32 SecondaryCacheQuarantineSize = 0U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 19; static const s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; @@ -46,7 +87,6 @@ struct DefaultConfig { template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive }; - struct AndroidConfig { using SizeClassMap = AndroidSizeClassMap; static const bool MaySupportMemoryTagging = true; @@ -54,15 +94,21 @@ struct AndroidConfig { #if SCUDO_CAN_USE_PRIMARY64 typedef SizeClassAllocator64<AndroidConfig> Primary; static const uptr PrimaryRegionSizeLog = 28U; + typedef u32 PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const bool PrimaryEnableRandomOffset = true; + static const uptr PrimaryMapSizeIncrement = 1UL << 18; #else typedef SizeClassAllocator32<AndroidConfig> Primary; static const uptr PrimaryRegionSizeLog = 18U; + typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; typedef MapAllocatorCache<AndroidConfig> SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 256U; + static const u32 SecondaryCacheQuarantineSize = 32U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; static const uptr SecondaryCacheDefaultMaxEntrySize = 2UL << 20; static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; @@ -79,15 +125,21 @@ struct AndroidSvelteConfig { #if SCUDO_CAN_USE_PRIMARY64 typedef SizeClassAllocator64<AndroidSvelteConfig> Primary; static const uptr PrimaryRegionSizeLog = 27U; + typedef u32 PrimaryCompactPtrT; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const bool PrimaryEnableRandomOffset = true; + static const uptr PrimaryMapSizeIncrement = 1UL << 18; #else typedef SizeClassAllocator32<AndroidSvelteConfig> Primary; static const uptr PrimaryRegionSizeLog = 16U; + typedef uptr PrimaryCompactPtrT; #endif static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; typedef MapAllocatorCache<AndroidSvelteConfig> SecondaryCache; static const u32 SecondaryCacheEntriesArraySize = 16U; + static const u32 SecondaryCacheQuarantineSize = 32U; static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U; static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 18; static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; @@ -99,11 +151,15 @@ struct AndroidSvelteConfig { #if SCUDO_CAN_USE_PRIMARY64 struct FuchsiaConfig { - using SizeClassMap = DefaultSizeClassMap; + using SizeClassMap = FuchsiaSizeClassMap; static const bool MaySupportMemoryTagging = false; typedef SizeClassAllocator64<FuchsiaConfig> Primary; static const uptr PrimaryRegionSizeLog = 30U; + typedef u32 PrimaryCompactPtrT; + static const bool PrimaryEnableRandomOffset = true; + static const uptr PrimaryMapSizeIncrement = 1UL << 18; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; @@ -111,12 +167,34 @@ struct FuchsiaConfig { template <class A> using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs. }; + +struct TrustyConfig { + using SizeClassMap = TrustySizeClassMap; + static const bool MaySupportMemoryTagging = false; + + typedef SizeClassAllocator64<TrustyConfig> Primary; + // Some apps have 1 page of heap total so small regions are necessary. + static const uptr PrimaryRegionSizeLog = 10U; + typedef u32 PrimaryCompactPtrT; + static const bool PrimaryEnableRandomOffset = false; + // Trusty is extremely memory-constrained so minimally round up map calls. + static const uptr PrimaryMapSizeIncrement = 1UL << 4; + static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; + static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + + typedef MapAllocatorNoCache SecondaryCache; + template <class A> + using TSDRegistryT = TSDRegistrySharedT<A, 1U, 1U>; // Shared, max 1 TSD. +}; #endif #if SCUDO_ANDROID typedef AndroidConfig Config; #elif SCUDO_FUCHSIA typedef FuchsiaConfig Config; +#elif SCUDO_TRUSTY +typedef TrustyConfig Config; #else typedef DefaultConfig Config; #endif diff --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h index e0d54f4e5971..248e096d07b6 100644 --- a/compiler-rt/lib/scudo/standalone/bytemap.h +++ b/compiler-rt/lib/scudo/standalone/bytemap.h @@ -17,10 +17,9 @@ namespace scudo { template <uptr Size> class FlatByteMap { public: - void initLinkerInitialized() {} - void init() { memset(Map, 0, sizeof(Map)); } + void init() { DCHECK(Size == 0 || Map[0] == 0); } - void unmapTestOnly() {} + void unmapTestOnly() { memset(Map, 0, Size); } void set(uptr Index, u8 Value) { DCHECK_LT(Index, Size); @@ -36,7 +35,7 @@ public: void enable() {} private: - u8 Map[Size]; + u8 Map[Size] = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 0df7a652ffa5..fd5360ce0f55 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -28,7 +28,6 @@ #ifdef GWP_ASAN_HOOKS #include "gwp_asan/guarded_pool_allocator.h" #include "gwp_asan/optional/backtrace.h" -#include "gwp_asan/optional/options_parser.h" #include "gwp_asan/optional/segv_handler.h" #endif // GWP_ASAN_HOOKS @@ -52,8 +51,7 @@ public: typedef typename Params::template TSDRegistryT<ThisT> TSDRegistryT; void callPostInitCallback() { - static pthread_once_t OnceControl = PTHREAD_ONCE_INIT; - pthread_once(&OnceControl, PostInitCallback); + pthread_once(&PostInitNonce, PostInitCallback); } struct QuarantineCallback { @@ -72,12 +70,10 @@ public: NewHeader.State = Chunk::State::Available; Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(Ptr); void *BlockBegin = Allocator::getBlockBegin(Ptr, &NewHeader); - const uptr ClassId = NewHeader.ClassId; - if (LIKELY(ClassId)) - Cache.deallocate(ClassId, BlockBegin); - else - Allocator.Secondary.deallocate(BlockBegin); + Cache.deallocate(NewHeader.ClassId, BlockBegin); } // We take a shortcut when allocating a quarantine batch by working with the @@ -136,7 +132,7 @@ public: typedef GlobalQuarantine<QuarantineCallback, void> QuarantineT; typedef typename QuarantineT::CacheT QuarantineCacheT; - void initLinkerInitialized() { + void init() { performSanityChecks(); // Check if hardware CRC32 is supported in the binary and by the platform, @@ -170,11 +166,10 @@ public: QuarantineMaxChunkSize = static_cast<u32>(getFlags()->quarantine_max_chunk_size); - Stats.initLinkerInitialized(); + Stats.init(); const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms; - Primary.initLinkerInitialized(ReleaseToOsIntervalMs); - Secondary.initLinkerInitialized(&Stats, ReleaseToOsIntervalMs); - + Primary.init(ReleaseToOsIntervalMs); + Secondary.init(&Stats, ReleaseToOsIntervalMs); Quarantine.init( static_cast<uptr>(getFlags()->quarantine_size_kb << 10), static_cast<uptr>(getFlags()->thread_local_quarantine_size_kb << 10)); @@ -184,12 +179,12 @@ public: // be functional, best called from PostInitCallback. void initGwpAsan() { #ifdef GWP_ASAN_HOOKS - // Bear in mind - Scudo has its own alignment guarantees that are strictly - // enforced. Scudo exposes the same allocation function for everything from - // malloc() to posix_memalign, so in general this flag goes unused, as Scudo - // will always ask GWP-ASan for an aligned amount of bytes. - gwp_asan::options::initOptions(getEnv("GWP_ASAN_OPTIONS"), Printf); - gwp_asan::options::Options Opt = gwp_asan::options::getOptions(); + gwp_asan::options::Options Opt; + Opt.Enabled = getFlags()->GWP_ASAN_Enabled; + Opt.MaxSimultaneousAllocations = + getFlags()->GWP_ASAN_MaxSimultaneousAllocations; + Opt.SampleRate = getFlags()->GWP_ASAN_SampleRate; + Opt.InstallSignalHandlers = getFlags()->GWP_ASAN_InstallSignalHandlers; // Embedded GWP-ASan is locked through the Scudo atfork handler (via // Allocator::disable calling GWPASan.disable). Disable GWP-ASan's atfork // handler. @@ -202,6 +197,11 @@ public: &GuardedAlloc, Printf, gwp_asan::backtrace::getPrintBacktraceFunction(), gwp_asan::backtrace::getSegvBacktraceFunction()); + + GuardedAllocSlotSize = + GuardedAlloc.getAllocatorState()->maximumAllocationSize(); + Stats.add(StatFree, static_cast<uptr>(Opt.MaxSimultaneousAllocations) * + GuardedAllocSlotSize); #endif // GWP_ASAN_HOOKS } @@ -209,11 +209,10 @@ public: TSDRegistry.initThreadMaybe(this, MinimalInit); } - void reset() { memset(this, 0, sizeof(*this)); } - void unmapTestOnly() { - TSDRegistry.unmapTestOnly(); + TSDRegistry.unmapTestOnly(this); Primary.unmapTestOnly(); + Secondary.unmapTestOnly(); #ifdef GWP_ASAN_HOOKS if (getFlags()->GWP_ASAN_InstallSignalHandlers) gwp_asan::segv_handler::uninstallSignalHandlers(); @@ -224,9 +223,7 @@ public: TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } // The Cache must be provided zero-initialized. - void initCache(CacheT *Cache) { - Cache->initLinkerInitialized(&Stats, &Primary); - } + void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); } // Release the resources used by a TSD, which involves: // - draining the local quarantine cache to the global quarantine; @@ -239,11 +236,26 @@ public: TSD->Cache.destroy(&Stats); } - ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) { - if (allocatorSupportsMemoryTagging<Params>()) - return reinterpret_cast<void *>( - untagPointer(reinterpret_cast<uptr>(Ptr))); - return Ptr; + ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { + if (!allocatorSupportsMemoryTagging<Params>()) + return Ptr; + auto UntaggedPtr = untagPointer(Ptr); + if (UntaggedPtr != Ptr) + return UntaggedPtr; + // Secondary, or pointer allocated while memory tagging is unsupported or + // disabled. The tag mismatch is okay in the latter case because tags will + // not be checked. + return addHeaderTag(Ptr); + } + + ALWAYS_INLINE uptr addHeaderTag(uptr Ptr) { + if (!allocatorSupportsMemoryTagging<Params>()) + return Ptr; + return addFixedTag(Ptr, 2); + } + + ALWAYS_INLINE void *addHeaderTag(void *Ptr) { + return reinterpret_cast<void *>(addHeaderTag(reinterpret_cast<uptr>(Ptr))); } NOINLINE u32 collectStackTrace() { @@ -260,7 +272,8 @@ public: #endif } - uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, uptr Size) { + uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, + uptr ClassId) { if (!Options.get(OptionBit::UseOddEvenTags)) return 0; @@ -269,8 +282,7 @@ public: // Size to Ptr will flip the least significant set bit of Size in Ptr, so // that bit will have the pattern 010101... for consecutive blocks, which we // can use to determine which tag mask to use. - return (Ptr & (1ULL << getLeastSignificantSetBitIndex(Size))) ? 0xaaaa - : 0x5555; + return 0x5555U << ((Ptr >> SizeClassMap::getSizeLSBByClassId(ClassId)) & 1); } NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, @@ -278,27 +290,34 @@ public: bool ZeroContents = false) { initThreadMaybe(); + const Options Options = Primary.Options.load(); + if (UNLIKELY(Alignment > MaxAlignment)) { + if (Options.get(OptionBit::MayReturnNull)) + return nullptr; + reportAlignmentTooBig(Alignment, MaxAlignment); + } + if (Alignment < MinAlignment) + Alignment = MinAlignment; + #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.shouldSample())) { - if (void *Ptr = GuardedAlloc.allocate(roundUpTo(Size, Alignment))) + if (void *Ptr = GuardedAlloc.allocate(Size, Alignment)) { + if (UNLIKELY(&__scudo_allocate_hook)) + __scudo_allocate_hook(Ptr, Size); + Stats.lock(); + Stats.add(StatAllocated, GuardedAllocSlotSize); + Stats.sub(StatFree, GuardedAllocSlotSize); + Stats.unlock(); return Ptr; + } } #endif // GWP_ASAN_HOOKS - const Options Options = Primary.Options.load(); const FillContentsMode FillContents = ZeroContents ? ZeroFill : TSDRegistry.getDisableMemInit() ? NoFill : Options.getFillContentsMode(); - if (UNLIKELY(Alignment > MaxAlignment)) { - if (Options.get(OptionBit::MayReturnNull)) - return nullptr; - reportAlignmentTooBig(Alignment, MaxAlignment); - } - if (Alignment < MinAlignment) - Alignment = MinAlignment; - // If the requested size happens to be 0 (more common than you might think), // allocate MinAlignment bytes on top of the header. Then add the extra // bytes required to fulfill the alignment requirements: we allocate enough @@ -340,7 +359,7 @@ public: TSD->unlock(); } if (UNLIKELY(ClassId == 0)) - Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd, + Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, FillContents); if (UNLIKELY(!Block)) { @@ -414,7 +433,7 @@ public: if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) PrevEnd = NextPage; TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr); - resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd); + resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, Size, BlockEnd); if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) { // If an allocation needs to be zeroed (i.e. calloc) we can normally // avoid zeroing the memory now since we can rely on memory having @@ -432,15 +451,26 @@ public: } } else { const uptr OddEvenMask = - computeOddEvenMaskForPointerMaybe(Options, BlockUptr, BlockSize); + computeOddEvenMaskForPointerMaybe(Options, BlockUptr, ClassId); TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd); } - storeAllocationStackMaybe(Options, Ptr); - } else if (UNLIKELY(FillContents != NoFill)) { - // This condition is not necessarily unlikely, but since memset is - // costly, we might as well mark it as such. - memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, - PrimaryT::getSizeByClassId(ClassId)); + storePrimaryAllocationStackMaybe(Options, Ptr); + } else { + Block = addHeaderTag(Block); + Ptr = addHeaderTag(Ptr); + if (UNLIKELY(FillContents != NoFill)) { + // This condition is not necessarily unlikely, but since memset is + // costly, we might as well mark it as such. + memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, + PrimaryT::getSizeByClassId(ClassId)); + } + } + } else { + Block = addHeaderTag(Block); + Ptr = addHeaderTag(Ptr); + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr)); + storeSecondaryAllocationStackMaybe(Options, Ptr, Size); } } @@ -480,22 +510,28 @@ public: // being destroyed properly. Any other heap operation will do a full init. initThreadMaybe(/*MinimalInit=*/true); + if (UNLIKELY(&__scudo_deallocate_hook)) + __scudo_deallocate_hook(Ptr); + + if (UNLIKELY(!Ptr)) + return; + #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) { GuardedAlloc.deallocate(Ptr); + Stats.lock(); + Stats.add(StatFree, GuardedAllocSlotSize); + Stats.sub(StatAllocated, GuardedAllocSlotSize); + Stats.unlock(); return; } #endif // GWP_ASAN_HOOKS - if (UNLIKELY(&__scudo_deallocate_hook)) - __scudo_deallocate_hook(Ptr); - - if (UNLIKELY(!Ptr)) - return; if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Deallocating, Ptr); - Ptr = untagPointerMaybe(Ptr); + void *TaggedPtr = Ptr; + Ptr = getHeaderTaggedPointer(Ptr); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); @@ -520,7 +556,7 @@ public: reportDeleteSizeMismatch(Ptr, DeleteSize, Size); } - quarantineOrDeallocateChunk(Options, Ptr, &Header, Size); + quarantineOrDeallocateChunk(Options, TaggedPtr, &Header, Size); } void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) { @@ -533,9 +569,6 @@ public: reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); } - void *OldTaggedPtr = OldPtr; - OldPtr = untagPointerMaybe(OldPtr); - // The following cases are handled by the C wrappers. DCHECK_NE(OldPtr, nullptr); DCHECK_NE(NewSize, 0); @@ -547,10 +580,17 @@ public: if (NewPtr) memcpy(NewPtr, OldPtr, (NewSize < OldSize) ? NewSize : OldSize); GuardedAlloc.deallocate(OldPtr); + Stats.lock(); + Stats.add(StatFree, GuardedAllocSlotSize); + Stats.sub(StatAllocated, GuardedAllocSlotSize); + Stats.unlock(); return NewPtr; } #endif // GWP_ASAN_HOOKS + void *OldTaggedPtr = OldPtr; + OldPtr = getHeaderTaggedPointer(OldPtr); + if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(OldPtr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr); @@ -570,7 +610,7 @@ public: Chunk::Origin::Malloc); } - void *BlockBegin = getBlockBegin(OldPtr, &OldHeader); + void *BlockBegin = getBlockBegin(OldTaggedPtr, &OldHeader); uptr BlockEnd; uptr OldSize; const uptr ClassId = OldHeader.ClassId; @@ -580,25 +620,30 @@ public: OldSize = OldHeader.SizeOrUnusedBytes; } else { BlockEnd = SecondaryT::getBlockEnd(BlockBegin); - OldSize = BlockEnd - - (reinterpret_cast<uptr>(OldPtr) + OldHeader.SizeOrUnusedBytes); + OldSize = BlockEnd - (reinterpret_cast<uptr>(OldTaggedPtr) + + OldHeader.SizeOrUnusedBytes); } // If the new chunk still fits in the previously allocated block (with a // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. - if (reinterpret_cast<uptr>(OldPtr) + NewSize <= BlockEnd) { + if (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize <= BlockEnd) { if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { Chunk::UnpackedHeader NewHeader = OldHeader; NewHeader.SizeOrUnusedBytes = (ClassId ? NewSize - : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) & + : BlockEnd - + (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(ClassId && useMemoryTagging<Params>(Options))) { - resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, - reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, - BlockEnd); - storeAllocationStackMaybe(Options, OldPtr); + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + if (ClassId) { + resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize, + reinterpret_cast<uptr>(OldTaggedPtr) + NewSize, + NewSize, untagPointer(BlockEnd)); + storePrimaryAllocationStackMaybe(Options, OldPtr); + } else { + storeSecondaryAllocationStackMaybe(Options, OldPtr, NewSize); + } } return OldTaggedPtr; } @@ -611,7 +656,7 @@ public: void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); if (LIKELY(NewPtr)) { memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); - quarantineOrDeallocateChunk(Options, OldPtr, &OldHeader, OldSize); + quarantineOrDeallocateChunk(Options, OldTaggedPtr, &OldHeader, OldSize); } return NewPtr; } @@ -649,7 +694,7 @@ public: // function. This can be called with a null buffer or zero size for buffer // sizing purposes. uptr getStats(char *Buffer, uptr Size) { - ScopedString Str(1024); + ScopedString Str; disable(); const uptr Length = getStats(&Str) + 1; enable(); @@ -663,7 +708,7 @@ public: } void printStats() { - ScopedString Str(1024); + ScopedString Str; disable(); getStats(&Str); enable(); @@ -682,16 +727,34 @@ public: void iterateOverChunks(uptr Base, uptr Size, iterate_callback Callback, void *Arg) { initThreadMaybe(); + if (archSupportsMemoryTagging()) + Base = untagPointer(Base); const uptr From = Base; const uptr To = Base + Size; - auto Lambda = [this, From, To, Callback, Arg](uptr Block) { + bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging<Params>() && + systemSupportsMemoryTagging(); + auto Lambda = [this, From, To, MayHaveTaggedPrimary, Callback, + Arg](uptr Block) { if (Block < From || Block >= To) return; uptr Chunk; Chunk::UnpackedHeader Header; - if (getChunkFromBlock(Block, &Chunk, &Header) && - Header.State == Chunk::State::Allocated) { + if (MayHaveTaggedPrimary) { + // A chunk header can either have a zero tag (tagged primary) or the + // header tag (secondary, or untagged primary). We don't know which so + // try both. + ScopedDisableMemoryTagChecks x; + if (!getChunkFromBlock(Block, &Chunk, &Header) && + !getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header)) + return; + } else { + if (!getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header)) + return; + } + if (Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; + if (allocatorSupportsMemoryTagging<Params>()) + TaggedChunk = untagPointer(TaggedChunk); if (useMemoryTagging<Params>(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header), @@ -752,7 +815,7 @@ public: return GuardedAlloc.getSize(Ptr); #endif // GWP_ASAN_HOOKS - Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); + Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); // Getting the usable size of a chunk only makes sense if it's allocated. @@ -777,7 +840,7 @@ public: #endif // GWP_ASAN_HOOKS if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)) return false; - Ptr = untagPointerMaybe(const_cast<void *>(Ptr)); + Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr)); Chunk::UnpackedHeader Header; return Chunk::isValid(Cookie, Ptr, &Header) && Header.State == Chunk::State::Allocated; @@ -787,8 +850,17 @@ public: return useMemoryTagging<Params>(Primary.Options.load()); } void disableMemoryTagging() { - if (allocatorSupportsMemoryTagging<Params>()) + // If we haven't been initialized yet, we need to initialize now in order to + // prevent a future call to initThreadMaybe() from enabling memory tagging + // based on feature detection. But don't call initThreadMaybe() because it + // may end up calling the allocator (via pthread_atfork, via the post-init + // callback), which may cause mappings to be created with memory tagging + // enabled. + TSDRegistry.initOnceMaybe(this); + if (allocatorSupportsMemoryTagging<Params>()) { + Secondary.disableMemoryTagging(); Primary.Options.clear(OptionBit::UseMemoryTagging); + } } void setTrackAllocationStacks(bool Track) { @@ -804,6 +876,14 @@ public: Primary.Options.setFillContentsMode(FillContents); } + void setAddLargeAllocationSlack(bool AddSlack) { + initThreadMaybe(); + if (AddSlack) + Primary.Options.set(OptionBit::AddLargeAllocationSlack); + else + Primary.Options.clear(OptionBit::AddLargeAllocationSlack); + } + const char *getStackDepotAddress() const { return reinterpret_cast<const char *>(&Depot); } @@ -816,116 +896,54 @@ public: return PrimaryT::getRegionInfoArraySize(); } + const char *getRingBufferAddress() const { + return reinterpret_cast<const char *>(&RingBuffer); + } + + static uptr getRingBufferSize() { return sizeof(RingBuffer); } + + static const uptr MaxTraceSize = 64; + + static void collectTraceMaybe(const StackDepot *Depot, + uintptr_t (&Trace)[MaxTraceSize], u32 Hash) { + uptr RingPos, Size; + if (!Depot->find(Hash, &RingPos, &Size)) + return; + for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) + Trace[I] = (*Depot)[RingPos + I]; + } + static void getErrorInfo(struct scudo_error_info *ErrorInfo, uintptr_t FaultAddr, const char *DepotPtr, - const char *RegionInfoPtr, const char *Memory, - const char *MemoryTags, uintptr_t MemoryAddr, - size_t MemorySize) { + const char *RegionInfoPtr, const char *RingBufferPtr, + const char *Memory, const char *MemoryTags, + uintptr_t MemoryAddr, size_t MemorySize) { *ErrorInfo = {}; if (!allocatorSupportsMemoryTagging<Params>() || MemoryAddr + MemorySize < MemoryAddr) return; - uptr UntaggedFaultAddr = untagPointer(FaultAddr); - u8 FaultAddrTag = extractTag(FaultAddr); - BlockInfo Info = - PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); - - auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { - if (Addr < MemoryAddr || Addr + archMemoryTagGranuleSize() < Addr || - Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) - return false; - *Data = &Memory[Addr - MemoryAddr]; - *Tag = static_cast<u8>( - MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]); - return true; - }; - - auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr, - Chunk::UnpackedHeader *Header, const u32 **Data, - u8 *Tag) { - const char *BlockBegin; - u8 BlockBeginTag; - if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag)) - return false; - uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin); - *ChunkAddr = Addr + ChunkOffset; - - const char *ChunkBegin; - if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag)) - return false; - *Header = *reinterpret_cast<const Chunk::UnpackedHeader *>( - ChunkBegin - Chunk::getHeaderSize()); - *Data = reinterpret_cast<const u32 *>(ChunkBegin); - return true; - }; - auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr); - - auto MaybeCollectTrace = [&](uintptr_t(&Trace)[MaxTraceSize], u32 Hash) { - uptr RingPos, Size; - if (!Depot->find(Hash, &RingPos, &Size)) - return; - for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) - Trace[I] = (*Depot)[RingPos + I]; - }; - size_t NextErrorReport = 0; - // First, check for UAF. - { - uptr ChunkAddr; - Chunk::UnpackedHeader Header; - const u32 *Data; - uint8_t Tag; - if (ReadBlock(Info.BlockBegin, &ChunkAddr, &Header, &Data, &Tag) && - Header.State != Chunk::State::Allocated && - Data[MemTagPrevTagIndex] == FaultAddrTag) { - auto *R = &ErrorInfo->reports[NextErrorReport++]; - R->error_type = USE_AFTER_FREE; - R->allocation_address = ChunkAddr; - R->allocation_size = Header.SizeOrUnusedBytes; - MaybeCollectTrace(R->allocation_trace, - Data[MemTagAllocationTraceIndex]); - R->allocation_tid = Data[MemTagAllocationTidIndex]; - MaybeCollectTrace(R->deallocation_trace, - Data[MemTagDeallocationTraceIndex]); - R->deallocation_tid = Data[MemTagDeallocationTidIndex]; - } - } - - auto CheckOOB = [&](uptr BlockAddr) { - if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd) - return false; - - uptr ChunkAddr; - Chunk::UnpackedHeader Header; - const u32 *Data; - uint8_t Tag; - if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) || - Header.State != Chunk::State::Allocated || Tag != FaultAddrTag) - return false; - - auto *R = &ErrorInfo->reports[NextErrorReport++]; - R->error_type = - UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; - R->allocation_address = ChunkAddr; - R->allocation_size = Header.SizeOrUnusedBytes; - MaybeCollectTrace(R->allocation_trace, Data[MemTagAllocationTraceIndex]); - R->allocation_tid = Data[MemTagAllocationTidIndex]; - return NextErrorReport == - sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0]); - }; - - if (CheckOOB(Info.BlockBegin)) - return; - - // Check for OOB in the 30 surrounding blocks. Beyond that we are likely to - // hit false positives. - for (int I = 1; I != 16; ++I) - if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) || - CheckOOB(Info.BlockBegin - I * Info.BlockSize)) - return; + // Check for OOB in the current block and the two surrounding blocks. Beyond + // that, UAF is more likely. + if (extractTag(FaultAddr) != 0) + getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RegionInfoPtr, Memory, MemoryTags, MemoryAddr, + MemorySize, 0, 2); + + // Check the ring buffer. For primary allocations this will only find UAF; + // for secondary allocations we can find either UAF or OOB. + getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RingBufferPtr); + + // Check for OOB in the 28 blocks surrounding the 3 we checked earlier. + // Beyond that we are likely to hit false positives. + if (extractTag(FaultAddr) != 0) + getInlineErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, + RegionInfoPtr, Memory, MemoryTags, MemoryAddr, + MemorySize, 2, 16); } private: @@ -949,39 +967,51 @@ private: // These are indexes into an "array" of 32-bit values that store information // inline with a chunk that is relevant to diagnosing memory tag faults, where - // 0 corresponds to the address of the user memory. This means that negative - // indexes may be used to store information about allocations, while positive - // indexes may only be used to store information about deallocations, because - // the user memory is in use until it has been deallocated. The smallest index - // that may be used is -2, which corresponds to 8 bytes before the user - // memory, because the chunk header size is 8 bytes and in allocators that - // support memory tagging the minimum alignment is at least the tag granule - // size (16 on aarch64), and the largest index that may be used is 3 because - // we are only guaranteed to have at least a granule's worth of space in the - // user memory. + // 0 corresponds to the address of the user memory. This means that only + // negative indexes may be used. The smallest index that may be used is -2, + // which corresponds to 8 bytes before the user memory, because the chunk + // header size is 8 bytes and in allocators that support memory tagging the + // minimum alignment is at least the tag granule size (16 on aarch64). static const sptr MemTagAllocationTraceIndex = -2; static const sptr MemTagAllocationTidIndex = -1; - static const sptr MemTagDeallocationTraceIndex = 0; - static const sptr MemTagDeallocationTidIndex = 1; - static const sptr MemTagPrevTagIndex = 2; - - static const uptr MaxTraceSize = 64; - u32 Cookie; - u32 QuarantineMaxChunkSize; + u32 Cookie = 0; + u32 QuarantineMaxChunkSize = 0; GlobalStats Stats; PrimaryT Primary; SecondaryT Secondary; QuarantineT Quarantine; TSDRegistryT TSDRegistry; + pthread_once_t PostInitNonce = PTHREAD_ONCE_INIT; #ifdef GWP_ASAN_HOOKS gwp_asan::GuardedPoolAllocator GuardedAlloc; + uptr GuardedAllocSlotSize = 0; #endif // GWP_ASAN_HOOKS StackDepot Depot; + struct AllocationRingBuffer { + struct Entry { + atomic_uptr Ptr; + atomic_uptr AllocationSize; + atomic_u32 AllocationTrace; + atomic_u32 AllocationTid; + atomic_u32 DeallocationTrace; + atomic_u32 DeallocationTid; + }; + + atomic_uptr Pos; +#ifdef SCUDO_FUZZ + static const uptr NumEntries = 2; +#else + static const uptr NumEntries = 32768; +#endif + Entry Entries[NumEntries]; + }; + AllocationRingBuffer RingBuffer = {}; + // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { // Verify that the header offset field can hold the maximum offset. In the @@ -1029,36 +1059,50 @@ private: const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(const_cast<void *>(Ptr)); return SecondaryT::getBlockEnd(getBlockBegin(Ptr, Header)) - reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes; } - void quarantineOrDeallocateChunk(Options Options, void *Ptr, + void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, Chunk::UnpackedHeader *Header, uptr Size) { + void *Ptr = getHeaderTaggedPointer(TaggedPtr); Chunk::UnpackedHeader NewHeader = *Header; - if (UNLIKELY(NewHeader.ClassId && useMemoryTagging<Params>(Options))) { - u8 PrevTag = extractTag(loadTag(reinterpret_cast<uptr>(Ptr))); - if (!TSDRegistry.getDisableMemInit()) { - uptr TaggedBegin, TaggedEnd; - const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - Options, reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)), - SizeClassMap::getSizeByClassId(NewHeader.ClassId)); - // Exclude the previous tag so that immediate use after free is detected - // 100% of the time. - setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, - &TaggedEnd); - } - NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit(); - storeDeallocationStackMaybe(Options, Ptr, PrevTag); - } // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. // This purposefully underflows for Size == 0. - const bool BypassQuarantine = - !Quarantine.getCacheSize() || ((Size - 1) >= QuarantineMaxChunkSize); - if (BypassQuarantine) { + const bool BypassQuarantine = !Quarantine.getCacheSize() || + ((Size - 1) >= QuarantineMaxChunkSize) || + !NewHeader.ClassId; + if (BypassQuarantine) NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + else + NewHeader.State = Chunk::State::Quarantined; + NewHeader.OriginOrWasZeroed = useMemoryTagging<Params>(Options) && + NewHeader.ClassId && + !TSDRegistry.getDisableMemInit(); + Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + + if (UNLIKELY(useMemoryTagging<Params>(Options))) { + u8 PrevTag = extractTag(reinterpret_cast<uptr>(TaggedPtr)); + storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); + if (NewHeader.ClassId) { + if (!TSDRegistry.getDisableMemInit()) { + uptr TaggedBegin, TaggedEnd; + const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( + Options, reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)), + NewHeader.ClassId); + // Exclude the previous tag so that immediate use after free is + // detected 100% of the time. + setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, + &TaggedEnd); + } + } + } + if (BypassQuarantine) { + if (allocatorSupportsMemoryTagging<Params>()) + Ptr = untagPointer(Ptr); void *BlockBegin = getBlockBegin(Ptr, &NewHeader); const uptr ClassId = NewHeader.ClassId; if (LIKELY(ClassId)) { @@ -1068,11 +1112,12 @@ private: if (UnlockRequired) TSD->unlock(); } else { - Secondary.deallocate(BlockBegin); + if (UNLIKELY(useMemoryTagging<Params>(Options))) + storeTags(reinterpret_cast<uptr>(BlockBegin), + reinterpret_cast<uptr>(Ptr)); + Secondary.deallocate(Options, BlockBegin); } } else { - NewHeader.State = Chunk::State::Quarantined; - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); Quarantine.put(&TSD->QuarantineCache, @@ -1096,7 +1141,62 @@ private: return Offset + Chunk::getHeaderSize(); } - void storeAllocationStackMaybe(Options Options, void *Ptr) { + // Set the tag of the granule past the end of the allocation to 0, to catch + // linear overflows even if a previous larger allocation used the same block + // and tag. Only do this if the granule past the end is in our block, because + // this would otherwise lead to a SEGV if the allocation covers the entire + // block and our block is at the end of a mapping. The tag of the next block's + // header granule will be set to 0, so it will serve the purpose of catching + // linear overflows in this case. + // + // For allocations of size 0 we do not end up storing the address tag to the + // memory tag space, which getInlineErrorInfo() normally relies on to match + // address tags against chunks. To allow matching in this case we store the + // address tag in the first byte of the chunk. + void storeEndMarker(uptr End, uptr Size, uptr BlockEnd) { + DCHECK_EQ(BlockEnd, untagPointer(BlockEnd)); + uptr UntaggedEnd = untagPointer(End); + if (UntaggedEnd != BlockEnd) { + storeTag(UntaggedEnd); + if (Size == 0) + *reinterpret_cast<u8 *>(UntaggedEnd) = extractTag(End); + } + } + + void *prepareTaggedChunk(void *Ptr, uptr Size, uptr ExcludeMask, + uptr BlockEnd) { + // Prepare the granule before the chunk to store the chunk header by setting + // its tag to 0. Normally its tag will already be 0, but in the case where a + // chunk holding a low alignment allocation is reused for a higher alignment + // allocation, the chunk may already have a non-zero tag from the previous + // allocation. + storeTag(reinterpret_cast<uptr>(Ptr) - archMemoryTagGranuleSize()); + + uptr TaggedBegin, TaggedEnd; + setRandomTag(Ptr, Size, ExcludeMask, &TaggedBegin, &TaggedEnd); + + storeEndMarker(TaggedEnd, Size, BlockEnd); + return reinterpret_cast<void *>(TaggedBegin); + } + + void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr NewSize, + uptr BlockEnd) { + uptr RoundOldPtr = roundUpTo(OldPtr, archMemoryTagGranuleSize()); + uptr RoundNewPtr; + if (RoundOldPtr >= NewPtr) { + // If the allocation is shrinking we just need to set the tag past the end + // of the allocation to 0. See explanation in storeEndMarker() above. + RoundNewPtr = roundUpTo(NewPtr, archMemoryTagGranuleSize()); + } else { + // Set the memory tag of the region + // [RoundOldPtr, roundUpTo(NewPtr, archMemoryTagGranuleSize())) + // to the pointer tag stored in OldPtr. + RoundNewPtr = storeTags(RoundOldPtr, NewPtr); + } + storeEndMarker(RoundNewPtr, NewSize, BlockEnd); + } + + void storePrimaryAllocationStackMaybe(Options Options, void *Ptr) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); @@ -1104,18 +1204,217 @@ private: Ptr32[MemTagAllocationTidIndex] = getThreadID(); } - void storeDeallocationStackMaybe(Options Options, void *Ptr, - uint8_t PrevTag) { + void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid, + uptr AllocationSize, u32 DeallocationTrace, + u32 DeallocationTid) { + uptr Pos = atomic_fetch_add(&RingBuffer.Pos, 1, memory_order_relaxed); + typename AllocationRingBuffer::Entry *Entry = + &RingBuffer.Entries[Pos % AllocationRingBuffer::NumEntries]; + + // First invalidate our entry so that we don't attempt to interpret a + // partially written state in getSecondaryErrorInfo(). The fences below + // ensure that the compiler does not move the stores to Ptr in between the + // stores to the other fields. + atomic_store_relaxed(&Entry->Ptr, 0); + + __atomic_signal_fence(__ATOMIC_SEQ_CST); + atomic_store_relaxed(&Entry->AllocationTrace, AllocationTrace); + atomic_store_relaxed(&Entry->AllocationTid, AllocationTid); + atomic_store_relaxed(&Entry->AllocationSize, AllocationSize); + atomic_store_relaxed(&Entry->DeallocationTrace, DeallocationTrace); + atomic_store_relaxed(&Entry->DeallocationTid, DeallocationTid); + __atomic_signal_fence(__ATOMIC_SEQ_CST); + + atomic_store_relaxed(&Entry->Ptr, reinterpret_cast<uptr>(Ptr)); + } + + void storeSecondaryAllocationStackMaybe(Options Options, void *Ptr, + uptr Size) { + if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) + return; + + u32 Trace = collectStackTrace(); + u32 Tid = getThreadID(); + + auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); + Ptr32[MemTagAllocationTraceIndex] = Trace; + Ptr32[MemTagAllocationTidIndex] = Tid; + + storeRingBufferEntry(untagPointer(Ptr), Trace, Tid, Size, 0, 0); + } + + void storeDeallocationStackMaybe(Options Options, void *Ptr, u8 PrevTag, + uptr Size) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; - // Disable tag checks here so that we don't need to worry about zero sized - // allocations. - ScopedDisableMemoryTagChecks x; auto *Ptr32 = reinterpret_cast<u32 *>(Ptr); - Ptr32[MemTagDeallocationTraceIndex] = collectStackTrace(); - Ptr32[MemTagDeallocationTidIndex] = getThreadID(); - Ptr32[MemTagPrevTagIndex] = PrevTag; + u32 AllocationTrace = Ptr32[MemTagAllocationTraceIndex]; + u32 AllocationTid = Ptr32[MemTagAllocationTidIndex]; + + u32 DeallocationTrace = collectStackTrace(); + u32 DeallocationTid = getThreadID(); + + storeRingBufferEntry(addFixedTag(untagPointer(Ptr), PrevTag), + AllocationTrace, AllocationTid, Size, + DeallocationTrace, DeallocationTid); + } + + static const size_t NumErrorReports = + sizeof(((scudo_error_info *)0)->reports) / + sizeof(((scudo_error_info *)0)->reports[0]); + + static void getInlineErrorInfo(struct scudo_error_info *ErrorInfo, + size_t &NextErrorReport, uintptr_t FaultAddr, + const StackDepot *Depot, + const char *RegionInfoPtr, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize, size_t MinDistance, + size_t MaxDistance) { + uptr UntaggedFaultAddr = untagPointer(FaultAddr); + u8 FaultAddrTag = extractTag(FaultAddr); + BlockInfo Info = + PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); + + auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { + if (Addr < MemoryAddr || Addr + archMemoryTagGranuleSize() < Addr || + Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) + return false; + *Data = &Memory[Addr - MemoryAddr]; + *Tag = static_cast<u8>( + MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]); + return true; + }; + + auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr, + Chunk::UnpackedHeader *Header, const u32 **Data, + u8 *Tag) { + const char *BlockBegin; + u8 BlockBeginTag; + if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag)) + return false; + uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin); + *ChunkAddr = Addr + ChunkOffset; + + const char *ChunkBegin; + if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag)) + return false; + *Header = *reinterpret_cast<const Chunk::UnpackedHeader *>( + ChunkBegin - Chunk::getHeaderSize()); + *Data = reinterpret_cast<const u32 *>(ChunkBegin); + + // Allocations of size 0 will have stashed the tag in the first byte of + // the chunk, see storeEndMarker(). + if (Header->SizeOrUnusedBytes == 0) + *Tag = static_cast<u8>(*ChunkBegin); + + return true; + }; + + if (NextErrorReport == NumErrorReports) + return; + + auto CheckOOB = [&](uptr BlockAddr) { + if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd) + return false; + + uptr ChunkAddr; + Chunk::UnpackedHeader Header; + const u32 *Data; + uint8_t Tag; + if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) || + Header.State != Chunk::State::Allocated || Tag != FaultAddrTag) + return false; + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + R->error_type = + UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW; + R->allocation_address = ChunkAddr; + R->allocation_size = Header.SizeOrUnusedBytes; + collectTraceMaybe(Depot, R->allocation_trace, + Data[MemTagAllocationTraceIndex]); + R->allocation_tid = Data[MemTagAllocationTidIndex]; + return NextErrorReport == NumErrorReports; + }; + + if (MinDistance == 0 && CheckOOB(Info.BlockBegin)) + return; + + for (size_t I = Max<size_t>(MinDistance, 1); I != MaxDistance; ++I) + if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) || + CheckOOB(Info.BlockBegin - I * Info.BlockSize)) + return; + } + + static void getRingBufferErrorInfo(struct scudo_error_info *ErrorInfo, + size_t &NextErrorReport, + uintptr_t FaultAddr, + const StackDepot *Depot, + const char *RingBufferPtr) { + auto *RingBuffer = + reinterpret_cast<const AllocationRingBuffer *>(RingBufferPtr); + uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); + + for (uptr I = Pos - 1; I != Pos - 1 - AllocationRingBuffer::NumEntries && + NextErrorReport != NumErrorReports; + --I) { + auto *Entry = &RingBuffer->Entries[I % AllocationRingBuffer::NumEntries]; + uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); + if (!EntryPtr) + continue; + + uptr UntaggedEntryPtr = untagPointer(EntryPtr); + uptr EntrySize = atomic_load_relaxed(&Entry->AllocationSize); + u32 AllocationTrace = atomic_load_relaxed(&Entry->AllocationTrace); + u32 AllocationTid = atomic_load_relaxed(&Entry->AllocationTid); + u32 DeallocationTrace = atomic_load_relaxed(&Entry->DeallocationTrace); + u32 DeallocationTid = atomic_load_relaxed(&Entry->DeallocationTid); + + if (DeallocationTid) { + // For UAF we only consider in-bounds fault addresses because + // out-of-bounds UAF is rare and attempting to detect it is very likely + // to result in false positives. + if (FaultAddr < EntryPtr || FaultAddr >= EntryPtr + EntrySize) + continue; + } else { + // Ring buffer OOB is only possible with secondary allocations. In this + // case we are guaranteed a guard region of at least a page on either + // side of the allocation (guard page on the right, guard page + tagged + // region on the left), so ignore any faults outside of that range. + if (FaultAddr < EntryPtr - getPageSizeCached() || + FaultAddr >= EntryPtr + EntrySize + getPageSizeCached()) + continue; + + // For UAF the ring buffer will contain two entries, one for the + // allocation and another for the deallocation. Don't report buffer + // overflow/underflow using the allocation entry if we have already + // collected a report from the deallocation entry. + bool Found = false; + for (uptr J = 0; J != NextErrorReport; ++J) { + if (ErrorInfo->reports[J].allocation_address == UntaggedEntryPtr) { + Found = true; + break; + } + } + if (Found) + continue; + } + + auto *R = &ErrorInfo->reports[NextErrorReport++]; + if (DeallocationTid) + R->error_type = USE_AFTER_FREE; + else if (FaultAddr < EntryPtr) + R->error_type = BUFFER_UNDERFLOW; + else + R->error_type = BUFFER_OVERFLOW; + + R->allocation_address = UntaggedEntryPtr; + R->allocation_size = EntrySize; + collectTraceMaybe(Depot, R->allocation_trace, AllocationTrace); + R->allocation_tid = AllocationTid; + collectTraceMaybe(Depot, R->deallocation_trace, DeallocationTrace); + R->deallocation_tid = DeallocationTid; + } } uptr getStats(ScopedString *Str) { diff --git a/compiler-rt/lib/scudo/standalone/common.cpp b/compiler-rt/lib/scudo/standalone/common.cpp index d93bfc59b3ca..666f95400c7e 100644 --- a/compiler-rt/lib/scudo/standalone/common.cpp +++ b/compiler-rt/lib/scudo/standalone/common.cpp @@ -8,6 +8,7 @@ #include "common.h" #include "atomic_helpers.h" +#include "string_utils.h" namespace scudo { @@ -21,11 +22,16 @@ uptr getPageSizeSlow() { } // Fatal internal map() or unmap() error (potentially OOM related). -void NORETURN dieOnMapUnmapError(bool OutOfMemory) { - outputRaw("Scudo ERROR: internal map or unmap failure"); - if (OutOfMemory) - outputRaw(" (OOM)"); - outputRaw("\n"); +void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { + char Error[128] = "Scudo ERROR: internal map or unmap failure\n"; + if (SizeIfOOM) { + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", + SizeIfOOM >> 10); + } + outputRaw(Error); + setAbortMessage(Error); die(); } diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index 662b733050bb..bc3dfec6dbba 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -13,6 +13,7 @@ #include "fuchsia.h" #include "linux.h" +#include "trusty.h" #include <stddef.h> #include <string.h> @@ -165,11 +166,15 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags = 0, void unmap(void *Addr, uptr Size, uptr Flags = 0, MapPlatformData *Data = nullptr); +void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, + MapPlatformData *Data = nullptr); + void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data = nullptr); -// Internal map & unmap fatal error. This must not call map(). -void NORETURN dieOnMapUnmapError(bool OutOfMemory = false); +// Internal map & unmap fatal error. This must not call map(). SizeIfOOM shall +// hold the requested size on an out-of-memory error, 0 otherwise. +void NORETURN dieOnMapUnmapError(uptr SizeIfOOM = 0); // Logging related functions. diff --git a/compiler-rt/lib/scudo/standalone/flags.cpp b/compiler-rt/lib/scudo/standalone/flags.cpp index 285143a5d6bb..de5153b288b1 100644 --- a/compiler-rt/lib/scudo/standalone/flags.cpp +++ b/compiler-rt/lib/scudo/standalone/flags.cpp @@ -23,6 +23,13 @@ void Flags::setDefaults() { #define SCUDO_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; #include "flags.inc" #undef SCUDO_FLAG + +#ifdef GWP_ASAN_HOOKS +#define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description) \ + GWP_ASAN_##Name = DefaultValue; +#include "gwp_asan/options.inc" +#undef GWP_ASAN_OPTION +#endif // GWP_ASAN_HOOKS } void registerFlags(FlagParser *Parser, Flags *F) { @@ -31,6 +38,14 @@ void registerFlags(FlagParser *Parser, Flags *F) { reinterpret_cast<void *>(&F->Name)); #include "flags.inc" #undef SCUDO_FLAG + +#ifdef GWP_ASAN_HOOKS +#define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description) \ + Parser->registerFlag("GWP_ASAN_" #Name, Description, FlagType::FT_##Type, \ + reinterpret_cast<void *>(&F->GWP_ASAN_##Name)); +#include "gwp_asan/options.inc" +#undef GWP_ASAN_OPTION +#endif // GWP_ASAN_HOOKS } static const char *getCompileDefinitionScudoDefaultOptions() { diff --git a/compiler-rt/lib/scudo/standalone/flags.inc b/compiler-rt/lib/scudo/standalone/flags.inc index b5cab4734166..690d889b8cee 100644 --- a/compiler-rt/lib/scudo/standalone/flags.inc +++ b/compiler-rt/lib/scudo/standalone/flags.inc @@ -37,12 +37,6 @@ SCUDO_FLAG(bool, zero_contents, false, "Zero chunk contents on allocation.") SCUDO_FLAG(bool, pattern_fill_contents, false, "Pattern fill chunk contents on allocation.") -SCUDO_FLAG(int, rss_limit_mb, -1, - "Enforce an upper limit (in megabytes) to the process RSS. The " - "allocator will terminate or return NULL when allocations are " - "attempted past that limit (depending on may_return_null). Negative " - "values disable the feature.") - SCUDO_FLAG(bool, may_return_null, true, "Indicate whether the allocator should terminate instead of " "returning NULL in otherwise non-fatal error scenarios, eg: OOM, " diff --git a/compiler-rt/lib/scudo/standalone/flags_parser.h b/compiler-rt/lib/scudo/standalone/flags_parser.h index 32511f768c66..ba832adbd909 100644 --- a/compiler-rt/lib/scudo/standalone/flags_parser.h +++ b/compiler-rt/lib/scudo/standalone/flags_parser.h @@ -29,7 +29,7 @@ public: void printFlagDescriptions(); private: - static const u32 MaxFlags = 16; + static const u32 MaxFlags = 20; struct Flag { const char *Name; const char *Desc; diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index d4ea33277941..3b473bc9e22d 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -15,7 +15,6 @@ #include "string_utils.h" #include <lib/sync/mutex.h> // for sync_mutex_t -#include <limits.h> // for PAGE_SIZE #include <stdlib.h> // for getenv() #include <zircon/compiler.h> #include <zircon/sanitizer.h> @@ -23,7 +22,7 @@ namespace scudo { -uptr getPageSize() { return PAGE_SIZE; } +uptr getPageSize() { return _zx_system_get_page_size(); } void NORETURN die() { __builtin_trap(); } @@ -42,7 +41,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { Size, &Data->Vmar, &Data->VmarBase); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } return reinterpret_cast<void *>(Data->VmarBase); @@ -50,7 +49,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { void *map(void *Addr, uptr Size, const char *Name, uptr Flags, MapPlatformData *Data) { - DCHECK_EQ(Size % PAGE_SIZE, 0); + DCHECK_EQ(Size % getPageSizeCached(), 0); const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); // For MAP_NOACCESS, just allocate a Vmar and return. @@ -72,7 +71,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_set_size(Vmo, VmoSize + Size); if (Status != ZX_OK) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } } else { @@ -80,7 +79,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_create(Size, ZX_VMO_RESIZABLE, &Vmo); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, strlen(Name)); @@ -97,14 +96,16 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, // No need to track the Vmo if we don't intend on resizing it. Close it. if (Flags & MAP_RESIZABLE) { DCHECK(Data); - DCHECK_EQ(Data->Vmo, ZX_HANDLE_INVALID); - Data->Vmo = Vmo; + if (Data->Vmo == ZX_HANDLE_INVALID) + Data->Vmo = Vmo; + else + DCHECK_EQ(Data->Vmo, Vmo); } else { CHECK_EQ(_zx_handle_close(Vmo), ZX_OK); } if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY); + dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); return nullptr; } if (Data) @@ -135,6 +136,16 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { } } +void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, + UNUSED MapPlatformData *Data) { + const zx_vm_option_t Prot = + (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE); + DCHECK(Data); + DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); + if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK) + dieOnMapUnmapError(); +} + void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data) { DCHECK(Data); diff --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp index f20a8a84a010..078e44b0dfc8 100644 --- a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp +++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp @@ -37,16 +37,24 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { StackDepot[i] = StackDepotBytes[i]; } - std::string RegionInfoBytes = FDP.ConsumeRemainingBytesAsString(); + std::string RegionInfoBytes = + FDP.ConsumeRandomLengthString(FDP.remaining_bytes()); std::vector<char> RegionInfo(AllocatorT::getRegionInfoArraySize(), 0); for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); ++i) { RegionInfo[i] = RegionInfoBytes[i]; } + std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); + std::vector<char> RingBuffer(AllocatorT::getRingBufferSize(), 0); + for (size_t i = 0; i < RingBufferBytes.length() && i < RingBuffer.size(); + ++i) { + RingBuffer[i] = RingBufferBytes[i]; + } + scudo_error_info ErrorInfo; AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), - RegionInfo.data(), Memory, MemoryTags, MemoryAddr, - MemorySize); + RegionInfo.data(), RingBuffer.data(), Memory, + MemoryTags, MemoryAddr, MemorySize); return 0; } diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h index 68029e4857a3..9b9a84623c51 100644 --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -73,9 +73,9 @@ typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); // pointer. void __scudo_get_error_info(struct scudo_error_info *error_info, uintptr_t fault_addr, const char *stack_depot, - const char *region_info, const char *memory, - const char *memory_tags, uintptr_t memory_addr, - size_t memory_size); + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size); enum scudo_error_type { UNKNOWN, @@ -107,6 +107,9 @@ size_t __scudo_get_stack_depot_size(); const char *__scudo_get_region_info_addr(); size_t __scudo_get_region_info_size(); +const char *__scudo_get_ring_buffer_addr(); +size_t __scudo_get_ring_buffer_size(); + #ifndef M_DECAY_TIME #define M_DECAY_TIME -100 #endif @@ -117,7 +120,7 @@ size_t __scudo_get_region_info_size(); // Tune the allocator's choice of memory tags to make it more likely that // a certain class of memory errors will be detected. The value argument should -// be one of the enumerators of the scudo_memtag_tuning enum below. +// be one of the M_MEMTAG_TUNING_* constants below. #ifndef M_MEMTAG_TUNING #define M_MEMTAG_TUNING -102 #endif @@ -142,13 +145,15 @@ size_t __scudo_get_region_info_size(); #define M_TSDS_COUNT_MAX -202 #endif -enum scudo_memtag_tuning { - // Tune for buffer overflows. - M_MEMTAG_TUNING_BUFFER_OVERFLOW, +// Tune for buffer overflows. +#ifndef M_MEMTAG_TUNING_BUFFER_OVERFLOW +#define M_MEMTAG_TUNING_BUFFER_OVERFLOW 0 +#endif - // Tune for use-after-free. - M_MEMTAG_TUNING_UAF, -}; +// Tune for use-after-free. +#ifndef M_MEMTAG_TUNING_UAF +#define M_MEMTAG_TUNING_UAF 1 +#endif } // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h index 0babbbe3c11b..c9ffad136b78 100644 --- a/compiler-rt/lib/scudo/standalone/internal_defs.h +++ b/compiler-rt/lib/scudo/standalone/internal_defs.h @@ -48,6 +48,34 @@ #define USED __attribute__((used)) #define NOEXCEPT noexcept +// This check is only available on Clang. This is essentially an alias of +// C++20's 'constinit' specifier which will take care of this when (if?) we can +// ask all libc's that use Scudo to compile us with C++20. Dynamic +// initialization is bad; Scudo is designed to be lazy-initializated on the +// first call to malloc/free (and friends), and this generally happens in the +// loader somewhere in libdl's init. After the loader is done, control is +// transferred to libc's initialization, and the dynamic initializers are run. +// If there's a dynamic initializer for Scudo, then it will clobber the +// already-initialized Scudo, and re-initialize all its members back to default +// values, causing various explosions. Unfortunately, marking +// scudo::Allocator<>'s constructor as 'constexpr' isn't sufficient to prevent +// dynamic initialization, as default initialization is fine under 'constexpr' +// (but not 'constinit'). Clang at -O0, and gcc at all opt levels will emit a +// dynamic initializer for any constant-initialized variables if there is a mix +// of default-initialized and constant-initialized variables. +// +// If you're looking at this because your build failed, you probably introduced +// a new member to scudo::Allocator<> (possibly transiently) that didn't have an +// initializer. The fix is easy - just add one. +#if defined(__has_attribute) +#if __has_attribute(require_constant_initialization) +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION \ + __attribute__((__require_constant_initialization__)) +#else +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION +#endif +#endif + namespace scudo { typedef unsigned long uptr; @@ -77,14 +105,11 @@ void NORETURN die(); void NORETURN reportCheckFailed(const char *File, int Line, const char *Condition, u64 Value1, u64 Value2); - #define CHECK_IMPL(C1, Op, C2) \ do { \ - scudo::u64 V1 = (scudo::u64)(C1); \ - scudo::u64 V2 = (scudo::u64)(C2); \ - if (UNLIKELY(!(V1 Op V2))) { \ - scudo::reportCheckFailed(__FILE__, __LINE__, \ - "(" #C1 ") " #Op " (" #C2 ")", V1, V2); \ + if (UNLIKELY(!(C1 Op C2))) { \ + scudo::reportCheckFailed(__FILE__, __LINE__, #C1 " " #Op " " #C2, \ + (scudo::u64)C1, (scudo::u64)C2); \ scudo::die(); \ } \ } while (false) @@ -106,13 +131,27 @@ void NORETURN reportCheckFailed(const char *File, int Line, #define DCHECK_GT(A, B) CHECK_GT(A, B) #define DCHECK_GE(A, B) CHECK_GE(A, B) #else -#define DCHECK(A) -#define DCHECK_EQ(A, B) -#define DCHECK_NE(A, B) -#define DCHECK_LT(A, B) -#define DCHECK_LE(A, B) -#define DCHECK_GT(A, B) -#define DCHECK_GE(A, B) +#define DCHECK(A) \ + do { \ + } while (false) +#define DCHECK_EQ(A, B) \ + do { \ + } while (false) +#define DCHECK_NE(A, B) \ + do { \ + } while (false) +#define DCHECK_LT(A, B) \ + do { \ + } while (false) +#define DCHECK_LE(A, B) \ + do { \ + } while (false) +#define DCHECK_GT(A, B) \ + do { \ + } while (false) +#define DCHECK_GE(A, B) \ + do { \ + } while (false) #endif // The superfluous die() call effectively makes this macro NORETURN. diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp index d2464677b279..c77c1bb600d9 100644 --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -50,27 +50,24 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, MmapProt = PROT_NONE; } else { MmapProt = PROT_READ | PROT_WRITE; + } #if defined(__aarch64__) #ifndef PROT_MTE #define PROT_MTE 0x20 #endif - if (Flags & MAP_MEMTAG) - MmapProt |= PROT_MTE; + if (Flags & MAP_MEMTAG) + MmapProt |= PROT_MTE; #endif - } - if (Addr) { - // Currently no scenario for a noaccess mapping with a fixed address. - DCHECK_EQ(Flags & MAP_NOACCESS, 0); + if (Addr) MmapFlags |= MAP_FIXED; - } void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM); + dieOnMapUnmapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID - if (!(Flags & MAP_NOACCESS)) + if (Name) prctl(ANDROID_PR_SET_VMA, ANDROID_PR_SET_VMA_ANON_NAME, P, Size, Name); #endif return P; @@ -82,9 +79,17 @@ void unmap(void *Addr, uptr Size, UNUSED uptr Flags, dieOnMapUnmapError(); } +void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, + UNUSED MapPlatformData *Data) { + int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); + if (mprotect(reinterpret_cast<void *>(Addr), Size, Prot) != 0) + dieOnMapUnmapError(); +} + void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, UNUSED MapPlatformData *Data) { void *Addr = reinterpret_cast<void *>(BaseAddress + Offset); + while (madvise(Addr, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) { } } diff --git a/compiler-rt/lib/scudo/standalone/list.h b/compiler-rt/lib/scudo/standalone/list.h index c3b898a328ca..1ac93c2f65d7 100644 --- a/compiler-rt/lib/scudo/standalone/list.h +++ b/compiler-rt/lib/scudo/standalone/list.h @@ -57,9 +57,9 @@ template <class T> struct IntrusiveList { void checkConsistency() const; protected: - uptr Size; - T *First; - T *Last; + uptr Size = 0; + T *First = nullptr; + T *Last = nullptr; }; template <class T> void IntrusiveList<T>::checkConsistency() const { diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h index 089aeb939627..f46645f9badf 100644 --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -17,24 +17,25 @@ namespace scudo { template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; + typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; struct TransferBatch { static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint; - void setFromArray(void **Array, u32 N) { + void setFromArray(CompactPtrT *Array, u32 N) { DCHECK_LE(N, MaxNumCached); Count = N; - memcpy(Batch, Array, sizeof(void *) * Count); + memcpy(Batch, Array, sizeof(Batch[0]) * Count); } void clear() { Count = 0; } - void add(void *P) { + void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); Batch[Count++] = P; } - void copyToArray(void **Array) const { - memcpy(Array, Batch, sizeof(void *) * Count); + void copyToArray(CompactPtrT *Array) const { + memcpy(Array, Batch, sizeof(Batch[0]) * Count); } u32 getCount() const { return Count; } - void *get(u32 I) const { + CompactPtrT get(u32 I) const { DCHECK_LE(I, Count); return Batch[I]; } @@ -45,21 +46,17 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { private: u32 Count; - void *Batch[MaxNumCached]; + CompactPtrT Batch[MaxNumCached]; }; - void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) { - Stats.initLinkerInitialized(); + void init(GlobalStats *S, SizeClassAllocator *A) { + DCHECK(isEmpty()); + Stats.init(); if (LIKELY(S)) S->link(&Stats); Allocator = A; } - void init(GlobalStats *S, SizeClassAllocator *A) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(S, A); - } - void destroy(GlobalStats *S) { drain(); if (LIKELY(S)) @@ -78,13 +75,10 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { // Count, while Chunks might be further off (depending on Count). That keeps // the memory accesses in close quarters. const uptr ClassSize = C->ClassSize; - void *P = C->Chunks[--C->Count]; - // The jury is still out as to whether any kind of PREFETCH here increases - // performance. It definitely decreases performance on Android though. - // if (!SCUDO_ANDROID) PREFETCH(P); + CompactPtrT CompactP = C->Chunks[--C->Count]; Stats.add(StatAllocated, ClassSize); Stats.sub(StatFree, ClassSize); - return P; + return Allocator->decompactPtr(ClassId, CompactP); } void deallocate(uptr ClassId, void *P) { @@ -97,22 +91,35 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { drain(C, ClassId); // See comment in allocate() about memory accesses. const uptr ClassSize = C->ClassSize; - C->Chunks[C->Count++] = P; + C->Chunks[C->Count++] = + Allocator->compactPtr(ClassId, reinterpret_cast<uptr>(P)); Stats.sub(StatAllocated, ClassSize); Stats.add(StatFree, ClassSize); } + bool isEmpty() const { + for (uptr I = 0; I < NumClasses; ++I) + if (PerClassArray[I].Count) + return false; + return true; + } + void drain() { - for (uptr I = 0; I < NumClasses; I++) { - PerClass *C = &PerClassArray[I]; - while (C->Count > 0) - drain(C, I); + // Drain BatchClassId last as createBatch can refill it. + for (uptr I = 0; I < NumClasses; ++I) { + if (I == BatchClassId) + continue; + while (PerClassArray[I].Count > 0) + drain(&PerClassArray[I], I); } + while (PerClassArray[BatchClassId].Count > 0) + drain(&PerClassArray[BatchClassId], BatchClassId); + DCHECK(isEmpty()); } TransferBatch *createBatch(uptr ClassId, void *B) { - if (ClassId != SizeClassMap::BatchClassId) - B = allocate(SizeClassMap::BatchClassId); + if (ClassId != BatchClassId) + B = allocate(BatchClassId); return reinterpret_cast<TransferBatch *>(B); } @@ -120,15 +127,17 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache { private: static const uptr NumClasses = SizeClassMap::NumClasses; + static const uptr BatchClassId = SizeClassMap::BatchClassId; struct PerClass { u32 Count; u32 MaxCount; + // Note: ClassSize is zero for the transfer batch. uptr ClassSize; - void *Chunks[2 * TransferBatch::MaxNumCached]; + CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; }; - PerClass PerClassArray[NumClasses]; + PerClass PerClassArray[NumClasses] = {}; LocalStats Stats; - SizeClassAllocator *Allocator; + SizeClassAllocator *Allocator = nullptr; ALWAYS_INLINE void initCacheMaybe(PerClass *C) { if (LIKELY(C->MaxCount)) @@ -142,13 +151,19 @@ private: PerClass *P = &PerClassArray[I]; const uptr Size = SizeClassAllocator::getSizeByClassId(I); P->MaxCount = 2 * TransferBatch::getMaxCached(Size); - P->ClassSize = Size; + if (I != BatchClassId) { + P->ClassSize = Size; + } else { + // ClassSize in this struct is only used for malloc/free stats, which + // should only track user allocations, not internal movements. + P->ClassSize = 0; + } } } void destroyBatch(uptr ClassId, void *B) { - if (ClassId != SizeClassMap::BatchClassId) - deallocate(SizeClassMap::BatchClassId, B); + if (ClassId != BatchClassId) + deallocate(BatchClassId, B); } NOINLINE bool refill(PerClass *C, uptr ClassId) { @@ -166,10 +181,10 @@ private: NOINLINE void drain(PerClass *C, uptr ClassId) { const u32 Count = Min(C->MaxCount / 2, C->Count); - TransferBatch *B = createBatch(ClassId, C->Chunks[0]); + TransferBatch *B = + createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0])); if (UNLIKELY(!B)) - reportOutOfMemory( - SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId)); + reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); B->setFromArray(&C->Chunks[0], Count); C->Count -= Count; for (uptr I = 0; I < C->Count; I++) diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index b1b62065ed72..c48e228fbe44 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -18,12 +18,17 @@ namespace scudo { -void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, uptr *TaggedBegin, - uptr *TaggedEnd); - -#if defined(__aarch64__) || defined(SCUDO_FUZZ) +#if (__clang_major__ >= 12 && defined(__aarch64__)) || defined(SCUDO_FUZZ) +// We assume that Top-Byte Ignore is enabled if the architecture supports memory +// tagging. Not all operating systems enable TBI, so we only claim architectural +// support for memory tagging if the operating system enables TBI. +#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) inline constexpr bool archSupportsMemoryTagging() { return true; } +#else +inline constexpr bool archSupportsMemoryTagging() { return false; } +#endif + inline constexpr uptr archMemoryTagGranuleSize() { return 16; } inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); } @@ -50,7 +55,7 @@ inline uint8_t extractTag(uptr Ptr) { #endif -#if defined(__aarch64__) +#if __clang_major__ >= 12 && defined(__aarch64__) #if SCUDO_LINUX @@ -62,15 +67,27 @@ inline bool systemSupportsMemoryTagging() { } inline bool systemDetectsMemoryTagFaultsTestOnly() { +#ifndef PR_SET_TAGGED_ADDR_CTRL +#define PR_SET_TAGGED_ADDR_CTRL 54 +#endif #ifndef PR_GET_TAGGED_ADDR_CTRL #define PR_GET_TAGGED_ADDR_CTRL 56 #endif +#ifndef PR_TAGGED_ADDR_ENABLE +#define PR_TAGGED_ADDR_ENABLE (1UL << 0) +#endif #ifndef PR_MTE_TCF_SHIFT #define PR_MTE_TCF_SHIFT 1 #endif +#ifndef PR_MTE_TAG_SHIFT +#define PR_MTE_TAG_SHIFT 3 +#endif #ifndef PR_MTE_TCF_NONE #define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) #endif +#ifndef PR_MTE_TCF_SYNC +#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +#endif #ifndef PR_MTE_TCF_MASK #define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) #endif @@ -79,139 +96,157 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() { PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE; } +inline void enableSystemMemoryTaggingTestOnly() { + prctl(PR_SET_TAGGED_ADDR_CTRL, + PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT), + 0, 0, 0); +} + #else // !SCUDO_LINUX inline bool systemSupportsMemoryTagging() { return false; } -inline bool systemDetectsMemoryTagFaultsTestOnly() { return false; } - -#endif // SCUDO_LINUX - -inline void disableMemoryTagChecksTestOnly() { - __asm__ __volatile__(".arch_extension mte; msr tco, #1"); +inline bool systemDetectsMemoryTagFaultsTestOnly() { + UNREACHABLE("memory tagging not supported"); } -inline void enableMemoryTagChecksTestOnly() { - __asm__ __volatile__(".arch_extension mte; msr tco, #0"); +inline void enableSystemMemoryTaggingTestOnly() { + UNREACHABLE("memory tagging not supported"); } +#endif // SCUDO_LINUX + class ScopedDisableMemoryTagChecks { - size_t PrevTCO; + uptr PrevTCO; public: ScopedDisableMemoryTagChecks() { - __asm__ __volatile__(".arch_extension mte; mrs %0, tco; msr tco, #1" - : "=r"(PrevTCO)); + __asm__ __volatile__( + R"( + .arch_extension memtag + mrs %0, tco + msr tco, #1 + )" + : "=r"(PrevTCO)); } ~ScopedDisableMemoryTagChecks() { - __asm__ __volatile__(".arch_extension mte; msr tco, %0" : : "r"(PrevTCO)); + __asm__ __volatile__( + R"( + .arch_extension memtag + msr tco, %0 + )" + : + : "r"(PrevTCO)); } }; inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { + ExcludeMask |= 1; // Always exclude Tag 0. uptr TaggedPtr; __asm__ __volatile__( - ".arch_extension mte; irg %[TaggedPtr], %[Ptr], %[ExcludeMask]" + R"( + .arch_extension memtag + irg %[TaggedPtr], %[Ptr], %[ExcludeMask] + )" : [TaggedPtr] "=r"(TaggedPtr) : [Ptr] "r"(Ptr), [ExcludeMask] "r"(ExcludeMask)); return TaggedPtr; } -inline uptr storeTags(uptr Begin, uptr End) { - DCHECK(Begin % 16 == 0); - if (Begin != End) { - __asm__ __volatile__( - R"( - .arch_extension mte - - 1: - stzg %[Cur], [%[Cur]], #16 - cmp %[Cur], %[End] - b.lt 1b - )" - : [Cur] "+&r"(Begin) - : [End] "r"(End) - : "memory"); - } - return Begin; -} - -inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr ExcludeMask, - uptr BlockEnd) { - // Prepare the granule before the chunk to store the chunk header by setting - // its tag to 0. Normally its tag will already be 0, but in the case where a - // chunk holding a low alignment allocation is reused for a higher alignment - // allocation, the chunk may already have a non-zero tag from the previous - // allocation. - __asm__ __volatile__(".arch_extension mte; stg %0, [%0, #-16]" - : - : "r"(Ptr) - : "memory"); - - uptr TaggedBegin, TaggedEnd; - setRandomTag(Ptr, Size, ExcludeMask, &TaggedBegin, &TaggedEnd); - - // Finally, set the tag of the granule past the end of the allocation to 0, - // to catch linear overflows even if a previous larger allocation used the - // same block and tag. Only do this if the granule past the end is in our - // block, because this would otherwise lead to a SEGV if the allocation - // covers the entire block and our block is at the end of a mapping. The tag - // of the next block's header granule will be set to 0, so it will serve the - // purpose of catching linear overflows in this case. - uptr UntaggedEnd = untagPointer(TaggedEnd); - if (UntaggedEnd != BlockEnd) - __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" - : - : "r"(UntaggedEnd) - : "memory"); - return reinterpret_cast<void *>(TaggedBegin); +inline uptr addFixedTag(uptr Ptr, uptr Tag) { + DCHECK_LT(Tag, 16); + DCHECK_EQ(untagPointer(Ptr), Ptr); + return Ptr | (Tag << 56); } -inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { - uptr RoundOldPtr = roundUpTo(OldPtr, 16); - if (RoundOldPtr >= NewPtr) { - // If the allocation is shrinking we just need to set the tag past the end - // of the allocation to 0. See explanation in prepareTaggedChunk above. - uptr RoundNewPtr = untagPointer(roundUpTo(NewPtr, 16)); - if (RoundNewPtr != BlockEnd) - __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" - : - : "r"(RoundNewPtr) - : "memory"); - return; - } - - __asm__ __volatile__(R"( - .arch_extension mte - - // Set the memory tag of the region - // [roundUpTo(OldPtr, 16), roundUpTo(NewPtr, 16)) - // to the pointer tag stored in OldPtr. +inline uptr storeTags(uptr Begin, uptr End) { + DCHECK_EQ(0, Begin % 16); + uptr LineSize, Next, Tmp; + __asm__ __volatile__( + R"( + .arch_extension memtag + + // Compute the cache line size in bytes (DCZID_EL0 stores it as the log2 + // of the number of 4-byte words) and bail out to the slow path if DCZID_EL0 + // indicates that the DC instructions are unavailable. + DCZID .req %[Tmp] + mrs DCZID, dczid_el0 + tbnz DCZID, #4, 3f + and DCZID, DCZID, #15 + mov %[LineSize], #4 + lsl %[LineSize], %[LineSize], DCZID + .unreq DCZID + + // Our main loop doesn't handle the case where we don't need to perform any + // DC GZVA operations. If the size of our tagged region is less than + // twice the cache line size, bail out to the slow path since it's not + // guaranteed that we'll be able to do a DC GZVA. + Size .req %[Tmp] + sub Size, %[End], %[Cur] + cmp Size, %[LineSize], lsl #1 + b.lt 3f + .unreq Size + + LineMask .req %[Tmp] + sub LineMask, %[LineSize], #1 + + // STZG until the start of the next cache line. + orr %[Next], %[Cur], LineMask 1: stzg %[Cur], [%[Cur]], #16 - cmp %[Cur], %[End] + cmp %[Cur], %[Next] b.lt 1b - // Finally, set the tag of the granule past the end of the allocation to 0. - and %[Cur], %[Cur], #(1 << 56) - 1 - cmp %[Cur], %[BlockEnd] - b.eq 2f - stg %[Cur], [%[Cur]] - + // DC GZVA cache lines until we have no more full cache lines. + bic %[Next], %[End], LineMask + .unreq LineMask 2: + dc gzva, %[Cur] + add %[Cur], %[Cur], %[LineSize] + cmp %[Cur], %[Next] + b.lt 2b + + // STZG until the end of the tagged region. This loop is also used to handle + // slow path cases. + 3: + cmp %[Cur], %[End] + b.ge 4f + stzg %[Cur], [%[Cur]], #16 + b 3b + + 4: )" - : [Cur] "+&r"(RoundOldPtr), [End] "+&r"(NewPtr) - : [BlockEnd] "r"(BlockEnd) + : [Cur] "+&r"(Begin), [LineSize] "=&r"(LineSize), [Next] "=&r"(Next), + [Tmp] "=&r"(Tmp) + : [End] "r"(End) + : "memory"); + DCHECK_EQ(0, Begin % 16); + return Begin; +} + +inline void storeTag(uptr Ptr) { + DCHECK_EQ(0, Ptr % 16); + __asm__ __volatile__(R"( + .arch_extension memtag + stg %0, [%0] + )" + : + : "r"(Ptr) : "memory"); } inline uptr loadTag(uptr Ptr) { + DCHECK_EQ(0, Ptr % 16); uptr TaggedPtr = Ptr; - __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" - : "+r"(TaggedPtr) - : - : "memory"); + __asm__ __volatile__( + R"( + .arch_extension memtag + ldg %0, [%0] + )" + : "+r"(TaggedPtr) + : + : "memory"); return TaggedPtr; } @@ -225,11 +260,7 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() { UNREACHABLE("memory tagging not supported"); } -inline void disableMemoryTagChecksTestOnly() { - UNREACHABLE("memory tagging not supported"); -} - -inline void enableMemoryTagChecksTestOnly() { +inline void enableSystemMemoryTaggingTestOnly() { UNREACHABLE("memory tagging not supported"); } @@ -243,25 +274,20 @@ inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { UNREACHABLE("memory tagging not supported"); } +inline uptr addFixedTag(uptr Ptr, uptr Tag) { + (void)Ptr; + (void)Tag; + UNREACHABLE("memory tagging not supported"); +} + inline uptr storeTags(uptr Begin, uptr End) { (void)Begin; (void)End; UNREACHABLE("memory tagging not supported"); } -inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr ExcludeMask, - uptr BlockEnd) { +inline void storeTag(uptr Ptr) { (void)Ptr; - (void)Size; - (void)ExcludeMask; - (void)BlockEnd; - UNREACHABLE("memory tagging not supported"); -} - -inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) { - (void)OldPtr; - (void)NewPtr; - (void)BlockEnd; UNREACHABLE("memory tagging not supported"); } @@ -278,9 +304,23 @@ inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size); } +inline void *untagPointer(void *Ptr) { + return reinterpret_cast<void *>(untagPointer(reinterpret_cast<uptr>(Ptr))); +} + +inline void *loadTag(void *Ptr) { + return reinterpret_cast<void *>(loadTag(reinterpret_cast<uptr>(Ptr))); +} + +inline void *addFixedTag(void *Ptr, uptr Tag) { + return reinterpret_cast<void *>( + addFixedTag(reinterpret_cast<uptr>(Ptr), Tag)); +} + template <typename Config> inline constexpr bool allocatorSupportsMemoryTagging() { - return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging; + return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging && + (1 << SCUDO_MIN_ALIGNMENT_LOG) >= archMemoryTagGranuleSize(); } } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/mutex.h b/compiler-rt/lib/scudo/standalone/mutex.h index d6e6a5b33aae..c8504c040914 100644 --- a/compiler-rt/lib/scudo/standalone/mutex.h +++ b/compiler-rt/lib/scudo/standalone/mutex.h @@ -22,7 +22,6 @@ namespace scudo { class HybridMutex { public: - void init() { M = {}; } bool tryLock(); NOINLINE void lock() { if (LIKELY(tryLock())) @@ -48,9 +47,9 @@ private: static constexpr u8 NumberOfYields = 8U; #if SCUDO_LINUX - atomic_u32 M; + atomic_u32 M = {}; #elif SCUDO_FUCHSIA - sync_mutex_t M; + sync_mutex_t M = {}; #endif void lockSlow(); diff --git a/compiler-rt/lib/scudo/standalone/options.h b/compiler-rt/lib/scudo/standalone/options.h index 91301bf5ec9c..4e6786513334 100644 --- a/compiler-rt/lib/scudo/standalone/options.h +++ b/compiler-rt/lib/scudo/standalone/options.h @@ -24,6 +24,7 @@ enum class OptionBit { TrackAllocationStacks, UseOddEvenTags, UseMemoryTagging, + AddLargeAllocationSlack, }; struct Options { @@ -43,9 +44,8 @@ template <typename Config> bool useMemoryTagging(Options Options) { } struct AtomicOptions { - atomic_u32 Val; + atomic_u32 Val = {}; -public: Options load() const { return Options{atomic_load_relaxed(&Val)}; } void clear(OptionBit Opt) { diff --git a/compiler-rt/lib/scudo/standalone/platform.h b/compiler-rt/lib/scudo/standalone/platform.h index a4c2a0b26603..36378d14d844 100644 --- a/compiler-rt/lib/scudo/standalone/platform.h +++ b/compiler-rt/lib/scudo/standalone/platform.h @@ -12,7 +12,7 @@ // Transitive includes of stdint.h specify some of the defines checked below. #include <stdint.h> -#if defined(__linux__) +#if defined(__linux__) && !defined(__TRUSTY__) #define SCUDO_LINUX 1 #else #define SCUDO_LINUX 0 @@ -31,6 +31,12 @@ #define SCUDO_FUCHSIA 0 #endif +#if defined(__TRUSTY__) +#define SCUDO_TRUSTY 1 +#else +#define SCUDO_TRUSTY 0 +#endif + #if __LP64__ #define SCUDO_WORDSIZE 64U #else diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index a88a2a67e951..326c10a32a85 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -41,6 +41,7 @@ namespace scudo { template <typename Config> class SizeClassAllocator32 { public: + typedef typename Config::PrimaryCompactPtrT CompactPtrT; typedef typename Config::SizeClassMap SizeClassMap; // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); @@ -59,15 +60,18 @@ public: static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void initLinkerInitialized(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) { if (SCUDO_FUCHSIA) reportError("SizeClassAllocator32 is not supported on Fuchsia"); - PossibleRegions.initLinkerInitialized(); + if (SCUDO_TRUSTY) + reportError("SizeClassAllocator32 is not supported on Trusty"); + DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); + PossibleRegions.init(); u32 Seed; const u64 Time = getMonotonicTime(); - if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) + if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>( Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6)); for (uptr I = 0; I < NumClasses; I++) { @@ -79,10 +83,6 @@ public: } setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void init(s32 ReleaseToOsInterval) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(ReleaseToOsInterval); - } void unmapTestOnly() { while (NumberOfStashedRegions > 0) @@ -95,6 +95,7 @@ public: MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) MaxRegionIndex = Sci->MaxRegionIndex; + *Sci = {}; } for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++) if (PossibleRegions[I]) @@ -102,6 +103,14 @@ public: PossibleRegions.unmapTestOnly(); } + CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const { + return static_cast<CompactPtrT>(Ptr); + } + + void *decompactPtr(UNUSED uptr ClassId, CompactPtrT CompactPtr) const { + return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr)); + } + TransferBatch *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); @@ -359,17 +368,18 @@ private: // Fill the transfer batches and put them in the size-class freelist. We // need to randomize the blocks for security purposes, so we first fill a // local array that we then shuffle before populating the batches. - void *ShuffleArray[ShuffleArraySize]; + CompactPtrT ShuffleArray[ShuffleArraySize]; DCHECK_LE(NumberOfBlocks, ShuffleArraySize); uptr P = Region + Offset; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) - ShuffleArray[I] = reinterpret_cast<void *>(P); + ShuffleArray[I] = reinterpret_cast<CompactPtrT>(P); // No need to shuffle the batches size class. if (ClassId != SizeClassMap::BatchClassId) shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState); for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]); + TransferBatch *B = + C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I])); if (UNLIKELY(!B)) return nullptr; const u32 N = Min(MaxCount, NumberOfBlocks - I); @@ -435,7 +445,7 @@ private: if (BlockSize < PageSize / 16U) { if (!Force && BytesPushed < Sci->AllocatedUser / 16U) return 0; - // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + // We want 8x% to 9x% free bytes (the larger the block, the lower the %). if ((BytesInFreeList * 100U) / Sci->AllocatedUser < (100U - 1U - BlockSize / 16U)) return 0; @@ -463,8 +473,11 @@ private: auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; }; - releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions, - BlockSize, &Recorder, SkipRegion); + auto DecompactPtr = [](CompactPtrT CompactPtr) { + return reinterpret_cast<uptr>(CompactPtr); + }; + releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize, + &Recorder, DecompactPtr, SkipRegion); if (Recorder.getReleasedRangesCount() > 0) { Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); @@ -476,17 +489,17 @@ private: return TotalReleasedBytes; } - SizeClassInfo SizeClassInfoArray[NumClasses]; + SizeClassInfo SizeClassInfoArray[NumClasses] = {}; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions; - atomic_s32 ReleaseToOsIntervalMs; + ByteMap PossibleRegions = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions; - uptr RegionsStash[MaxStashedRegions]; + uptr NumberOfStashedRegions = 0; + uptr RegionsStash[MaxStashedRegions] = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 2724a2529f75..13420bf3d222 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -25,8 +25,9 @@ namespace scudo { // // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in // Regions, specific to each size class. Note that the base of that mapping is -// random (based to the platform specific map() capabilities), and that each -// Region actually starts at a random offset from its base. +// random (based to the platform specific map() capabilities). If +// PrimaryEnableRandomOffset is set, each Region actually starts at a random +// offset from its base. // // Regions are mapped incrementally on demand to fulfill allocation requests, // those mappings being split into equally sized Blocks based on the size class @@ -42,6 +43,8 @@ namespace scudo { template <typename Config> class SizeClassAllocator64 { public: + typedef typename Config::PrimaryCompactPtrT CompactPtrT; + static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale; typedef typename Config::SizeClassMap SizeClassMap; typedef SizeClassAllocator64<Config> ThisT; typedef SizeClassAllocatorLocalCache<ThisT> CacheT; @@ -49,39 +52,45 @@ public: static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? sizeof(TransferBatch) + ? roundUpTo(sizeof(TransferBatch), 1U << CompactPtrScale) : SizeClassMap::getSizeByClassId(ClassId); } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void initLinkerInitialized(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) { + DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); + DCHECK_EQ(PrimaryBase, 0U); // Reserve the space required for the Primary. PrimaryBase = reinterpret_cast<uptr>( - map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data)); + map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data)); u32 Seed; const u64 Time = getMonotonicTime(); - if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) + if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))) Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); - // The actual start of a region is offseted by a random number of pages. - Region->RegionBeg = - getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; + // The actual start of a region is offset by a random number of pages + // when PrimaryEnableRandomOffset is set. + Region->RegionBeg = getRegionBaseByClassId(I) + + (Config::PrimaryEnableRandomOffset + ? ((getRandomModN(&Seed, 16) + 1) * PageSize) + : 0); Region->RandState = getRandomU32(&Seed); Region->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void init(s32 ReleaseToOsInterval) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(ReleaseToOsInterval); - } void unmapTestOnly() { + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + *Region = {}; + } unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data); + PrimaryBase = 0U; } TransferBatch *popBatch(CacheT *C, uptr ClassId) { @@ -194,6 +203,24 @@ public: static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } + uptr getCompactPtrBaseByClassId(uptr ClassId) { + // If we are not compacting pointers, base everything off of 0. + if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0) + return 0; + return getRegionInfo(ClassId)->RegionBeg; + } + + CompactPtrT compactPtr(uptr ClassId, uptr Ptr) { + DCHECK_LE(ClassId, SizeClassMap::LargestClassId); + return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr); + } + + void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) { + DCHECK_LE(ClassId, SizeClassMap::LargestClassId); + return reinterpret_cast<void *>( + decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); + } + static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { const RegionInfo *RegionInfoArray = reinterpret_cast<const RegionInfo *>(RegionInfoData); @@ -245,8 +272,7 @@ private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr PrimarySize = RegionSize * NumClasses; - // Call map for user memory with at least this size. - static const uptr MapSizeIncrement = 1UL << 18; + static const uptr MapSizeIncrement = Config::PrimaryMapSizeIncrement; // Fill at most this number of batches from the newly map'd memory. static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; @@ -265,24 +291,24 @@ private: struct UnpaddedRegionInfo { HybridMutex Mutex; SinglyLinkedList<TransferBatch> FreeList; - RegionStats Stats; - bool Exhausted; - u32 RandState; - uptr RegionBeg; - uptr MappedUser; // Bytes mapped for user memory. - uptr AllocatedUser; // Bytes allocated for user memory. - MapPlatformData Data; - ReleaseToOsInfo ReleaseInfo; + uptr RegionBeg = 0; + RegionStats Stats = {}; + u32 RandState = 0; + uptr MappedUser = 0; // Bytes mapped for user memory. + uptr AllocatedUser = 0; // Bytes allocated for user memory. + MapPlatformData Data = {}; + ReleaseToOsInfo ReleaseInfo = {}; + bool Exhausted = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - - (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)]; + (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {}; }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); - uptr PrimaryBase; - MapPlatformData Data; - atomic_s32 ReleaseToOsIntervalMs; + uptr PrimaryBase = 0; + MapPlatformData Data = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; RegionInfo *getRegionInfo(uptr ClassId) { @@ -294,6 +320,14 @@ private: return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); } + static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { + return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale); + } + + static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) { + return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale); + } + NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) { const uptr Size = getSizeByClassId(ClassId); @@ -303,15 +337,15 @@ private: const uptr MappedUser = Region->MappedUser; const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size; // Map more space for blocks, if necessary. - if (UNLIKELY(TotalUserBytes > MappedUser)) { + if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. - const uptr UserMapSize = + const uptr MapSize = roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); - if (RegionBase + MappedUser + UserMapSize > RegionSize) { + if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { if (!Region->Exhausted) { Region->Exhausted = true; - ScopedString Str(1024); + ScopedString Str; getStats(&Str); Str.append( "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", @@ -322,14 +356,15 @@ private: } if (MappedUser == 0) Region->Data = Data; - if (!map(reinterpret_cast<void *>(RegionBeg + MappedUser), UserMapSize, - "scudo:primary", - MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0), - &Region->Data)) + if (UNLIKELY(!map( + reinterpret_cast<void *>(RegionBeg + MappedUser), MapSize, + "scudo:primary", + MAP_ALLOWNOMEM | MAP_RESIZABLE | + (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0), + &Region->Data))) return nullptr; - Region->MappedUser += UserMapSize; - C->getStats().add(StatMapped, UserMapSize); + Region->MappedUser += MapSize; + C->getStats().add(StatMapped, MapSize); } const u32 NumberOfBlocks = Min( @@ -339,17 +374,20 @@ private: constexpr u32 ShuffleArraySize = MaxNumBatches * TransferBatch::MaxNumCached; - void *ShuffleArray[ShuffleArraySize]; + CompactPtrT ShuffleArray[ShuffleArraySize]; DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); uptr P = RegionBeg + Region->AllocatedUser; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) - ShuffleArray[I] = reinterpret_cast<void *>(P); + ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); // No need to shuffle the batches size class. if (ClassId != SizeClassMap::BatchClassId) shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState); for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]); + TransferBatch *B = + C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal( + CompactPtrBase, ShuffleArray[I]))); if (UNLIKELY(!B)) return nullptr; const u32 N = Min(MaxCount, NumberOfBlocks - I); @@ -409,7 +447,7 @@ private: if (BlockSize < PageSize / 16U) { if (!Force && BytesPushed < Region->AllocatedUser / 16U) return 0; - // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). + // We want 8x% to 9x% free bytes (the larger the block, the lower the %). if ((BytesInFreeList * 100U) / Region->AllocatedUser < (100U - 1U - BlockSize / 16U)) return 0; @@ -426,11 +464,14 @@ private: } } - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); - releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - Region->AllocatedUser, 1U, BlockSize, &Recorder, - SkipRegion); + const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); + auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { + return decompactPtrInternal(CompactPtrBase, CompactPtr); + }; + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U, + BlockSize, &Recorder, DecompactPtr, SkipRegion); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h index 27aa4bfec91a..2d231c3a28db 100644 --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -64,11 +64,7 @@ static_assert(sizeof(QuarantineBatch) <= (1U << 13), ""); // 8Kb. // Per-thread cache of memory blocks. template <typename Callback> class QuarantineCache { public: - void initLinkerInitialized() {} - void init() { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(); - } + void init() { DCHECK_EQ(atomic_load_relaxed(&Size), 0U); } // Total memory used, including internal accounting. uptr getSize() const { return atomic_load_relaxed(&Size); } @@ -161,7 +157,7 @@ public: private: SinglyLinkedList<QuarantineBatch> List; - atomic_uptr Size; + atomic_uptr Size = {}; void addToSize(uptr add) { atomic_store_relaxed(&Size, getSize() + add); } void subFromSize(uptr sub) { atomic_store_relaxed(&Size, getSize() - sub); } @@ -174,8 +170,13 @@ private: template <typename Callback, typename Node> class GlobalQuarantine { public: typedef QuarantineCache<Callback> CacheT; + using ThisT = GlobalQuarantine<Callback, Node>; - void initLinkerInitialized(uptr Size, uptr CacheSize) { + void init(uptr Size, uptr CacheSize) { + DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); + DCHECK_EQ(atomic_load_relaxed(&MaxSize), 0U); + DCHECK_EQ(atomic_load_relaxed(&MinSize), 0U); + DCHECK_EQ(atomic_load_relaxed(&MaxCacheSize), 0U); // Thread local quarantine size can be zero only when global quarantine size // is zero (it allows us to perform just one atomic read per put() call). CHECK((Size == 0 && CacheSize == 0) || CacheSize != 0); @@ -184,16 +185,7 @@ public: atomic_store_relaxed(&MinSize, Size / 10 * 9); // 90% of max size. atomic_store_relaxed(&MaxCacheSize, CacheSize); - Cache.initLinkerInitialized(); - } - void init(uptr Size, uptr CacheSize) { - CacheMutex.init(); Cache.init(); - RecycleMutex.init(); - MinSize = {}; - MaxSize = {}; - MaxCacheSize = {}; - initLinkerInitialized(Size, CacheSize); } uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); } @@ -246,9 +238,9 @@ private: alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; CacheT Cache; alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; - atomic_uptr MinSize; - atomic_uptr MaxSize; - alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; + atomic_uptr MinSize = {}; + atomic_uptr MaxSize = {}; + alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; void NOINLINE recycle(uptr MinSize, Callback Cb) { CacheT Tmp; diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index 5c11da2200e9..293a8bc27bab 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -17,17 +17,19 @@ namespace scudo { class ReleaseRecorder { public: - ReleaseRecorder(uptr BaseAddress, MapPlatformData *Data = nullptr) - : BaseAddress(BaseAddress), Data(Data) {} + ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr) + : Base(Base), Data(Data) {} uptr getReleasedRangesCount() const { return ReleasedRangesCount; } uptr getReleasedBytes() const { return ReleasedBytes; } + uptr getBase() const { return Base; } + // Releases [From, To) range of pages back to OS. void releasePageRangeToOS(uptr From, uptr To) { const uptr Size = To - From; - releasePagesToOS(BaseAddress, From, Size, Data); + releasePagesToOS(Base, From, Size, Data); ReleasedRangesCount++; ReleasedBytes += Size; } @@ -35,7 +37,7 @@ public: private: uptr ReleasedRangesCount = 0; uptr ReleasedBytes = 0; - uptr BaseAddress = 0; + uptr Base = 0; MapPlatformData *Data = nullptr; }; @@ -79,7 +81,8 @@ public: memset(Buffer, 0, BufferSize); } else { Buffer = reinterpret_cast<uptr *>( - map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + map(nullptr, roundUpTo(BufferSize, getPageSizeCached()), + "scudo:counters", MAP_ALLOWNOMEM)); } } ~PackedCounterArray() { @@ -88,7 +91,8 @@ public: if (Buffer == &StaticBuffer[0]) Mutex.unlock(); else - unmap(reinterpret_cast<void *>(Buffer), BufferSize); + unmap(reinterpret_cast<void *>(Buffer), + roundUpTo(BufferSize, getPageSizeCached())); } bool isAllocated() const { return !!Buffer; } @@ -179,11 +183,13 @@ private: uptr CurrentRangeStatePage = 0; }; -template <class TransferBatchT, class ReleaseRecorderT, typename SkipRegionT> +template <class TransferBatchT, class ReleaseRecorderT, typename DecompactPtrT, + typename SkipRegionT> NOINLINE void -releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, +releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, - ReleaseRecorderT *Recorder, SkipRegionT SkipRegion) { + ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr, + SkipRegionT SkipRegion) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -236,9 +242,8 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, // Each chunk affects one page only. for (const auto &It : FreeList) { for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; - // This takes care of P < Base and P >= Base + RoundedSize. - if (UNLIKELY(P >= RoundedSize)) + const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); + if (P >= RoundedSize) continue; const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; const uptr PInRegion = P - RegionIndex * RegionSize; @@ -251,9 +256,8 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base, const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; for (const auto &It : FreeList) { for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base; - // This takes care of P < Base and P >= Base + RoundedSize. - if (UNLIKELY(P >= RoundedSize)) + const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); + if (P >= RoundedSize) continue; const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; uptr PInRegion = P - RegionIndex * RegionSize; diff --git a/compiler-rt/lib/scudo/standalone/report.cpp b/compiler-rt/lib/scudo/standalone/report.cpp index 80cc6eda2af9..561c7c51f4e1 100644 --- a/compiler-rt/lib/scudo/standalone/report.cpp +++ b/compiler-rt/lib/scudo/standalone/report.cpp @@ -17,7 +17,7 @@ namespace scudo { class ScopedErrorReport { public: - ScopedErrorReport() : Message(512) { Message.append("Scudo ERROR: "); } + ScopedErrorReport() : Message() { Message.append("Scudo ERROR: "); } void append(const char *Format, ...) { va_list Args; va_start(Args, Format); @@ -45,8 +45,8 @@ void NORETURN reportCheckFailed(const char *File, int Line, trap(); } ScopedErrorReport Report; - Report.append("CHECK failed @ %s:%d %s (%llu, %llu)\n", File, Line, Condition, - Value1, Value2); + Report.append("CHECK failed @ %s:%d %s ((u64)op1=%llu, (u64)op2=%llu)\n", + File, Line, Condition, Value1, Value2); } // Generic string fatal error message. diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 063640106abb..630e64d46edf 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -9,9 +9,12 @@ #ifndef SCUDO_SECONDARY_H_ #define SCUDO_SECONDARY_H_ +#include "chunk.h" #include "common.h" #include "list.h" +#include "memtag.h" #include "mutex.h" +#include "options.h" #include "stats.h" #include "string_utils.h" @@ -25,42 +28,61 @@ namespace scudo { namespace LargeBlock { -struct Header { +struct alignas(Max<uptr>(archSupportsMemoryTagging() + ? archMemoryTagGranuleSize() + : 1, + 1U << SCUDO_MIN_ALIGNMENT_LOG)) Header { LargeBlock::Header *Prev; LargeBlock::Header *Next; - uptr BlockEnd; + uptr CommitBase; + uptr CommitSize; uptr MapBase; uptr MapSize; [[no_unique_address]] MapPlatformData Data; }; -constexpr uptr getHeaderSize() { - return roundUpTo(sizeof(Header), 1U << SCUDO_MIN_ALIGNMENT_LOG); +static_assert(sizeof(Header) % (1U << SCUDO_MIN_ALIGNMENT_LOG) == 0, ""); +static_assert(!archSupportsMemoryTagging() || + sizeof(Header) % archMemoryTagGranuleSize() == 0, + ""); + +constexpr uptr getHeaderSize() { return sizeof(Header); } + +template <typename Config> static uptr addHeaderTag(uptr Ptr) { + if (allocatorSupportsMemoryTagging<Config>()) + return addFixedTag(Ptr, 1); + return Ptr; } -static Header *getHeader(uptr Ptr) { - return reinterpret_cast<Header *>(Ptr - getHeaderSize()); +template <typename Config> static Header *getHeader(uptr Ptr) { + return reinterpret_cast<Header *>(addHeaderTag<Config>(Ptr)) - 1; } -static Header *getHeader(const void *Ptr) { - return getHeader(reinterpret_cast<uptr>(Ptr)); +template <typename Config> static Header *getHeader(const void *Ptr) { + return getHeader<Config>(reinterpret_cast<uptr>(Ptr)); } } // namespace LargeBlock +static void unmap(LargeBlock::Header *H) { + MapPlatformData Data = H->Data; + unmap(reinterpret_cast<void *>(H->MapBase), H->MapSize, UNMAP_ALL, &Data); +} + class MapAllocatorNoCache { public: - void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {} void init(UNUSED s32 ReleaseToOsInterval) {} - bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H, - UNUSED bool *Zeroed) { + bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment, + UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) { return false; } - bool store(UNUSED LargeBlock::Header *H) { return false; } + void store(UNUSED Options Options, LargeBlock::Header *H) { unmap(H); } bool canCache(UNUSED uptr Size) { return false; } void disable() {} void enable() {} void releaseToOS() {} + void disableMemoryTagging() {} + void unmapTestOnly() {} bool setOption(Option O, UNUSED sptr Value) { if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount || O == Option::MaxCacheEntrySize) @@ -70,6 +92,27 @@ public: } }; +static const uptr MaxUnusedCachePages = 4U; + +template <typename Config> +void mapSecondary(Options Options, uptr CommitBase, uptr CommitSize, + uptr AllocPos, uptr Flags, MapPlatformData *Data) { + const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached(); + if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) { + const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); + map(reinterpret_cast<void *>(CommitBase), UntaggedPos - CommitBase, + "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG | Flags, Data); + map(reinterpret_cast<void *>(UntaggedPos), + CommitBase + CommitSize - UntaggedPos, "scudo:secondary", + MAP_RESIZABLE | Flags, Data); + } else { + map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary", + MAP_RESIZABLE | (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0) | + Flags, + Data); + } +} + template <typename Config> class MapAllocatorCache { public: // Ensure the default maximum specified fits the array. @@ -77,79 +120,155 @@ public: Config::SecondaryCacheEntriesArraySize, ""); - void initLinkerInitialized(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) { + DCHECK_EQ(EntriesCount, 0U); setOption(Option::MaxCacheEntriesCount, static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntriesCount)); setOption(Option::MaxCacheEntrySize, static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntrySize)); setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); } - void init(s32 ReleaseToOsInterval) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(ReleaseToOsInterval); - } - bool store(LargeBlock::Header *H) { + void store(Options Options, LargeBlock::Header *H) { + if (!canCache(H->CommitSize)) + return unmap(H); + bool EntryCached = false; bool EmptyCache = false; + const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); const u64 Time = getMonotonicTime(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - { + CachedBlock Entry; + Entry.CommitBase = H->CommitBase; + Entry.CommitSize = H->CommitSize; + Entry.MapBase = H->MapBase; + Entry.MapSize = H->MapSize; + Entry.BlockBegin = reinterpret_cast<uptr>(H + 1); + Entry.Data = H->Data; + Entry.Time = Time; + if (useMemoryTagging<Config>(Options)) { + if (Interval == 0 && !SCUDO_FUCHSIA) { + // Release the memory and make it inaccessible at the same time by + // creating a new MAP_NOACCESS mapping on top of the existing mapping. + // Fuchsia does not support replacing mappings by creating a new mapping + // on top so we just do the two syscalls there. + Entry.Time = 0; + mapSecondary<Config>(Options, Entry.CommitBase, Entry.CommitSize, + Entry.CommitBase, MAP_NOACCESS, &Entry.Data); + } else { + setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS, + &Entry.Data); + } + } else if (Interval == 0) { + releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.Time = 0; + } + do { ScopedLock L(Mutex); + if (useMemoryTagging<Config>(Options) && QuarantinePos == -1U) { + // If we get here then memory tagging was disabled in between when we + // read Options and when we locked Mutex. We can't insert our entry into + // the quarantine or the cache because the permissions would be wrong so + // just unmap it. + break; + } + if (Config::SecondaryCacheQuarantineSize && + useMemoryTagging<Config>(Options)) { + QuarantinePos = + (QuarantinePos + 1) % Max(Config::SecondaryCacheQuarantineSize, 1u); + if (!Quarantine[QuarantinePos].CommitBase) { + Quarantine[QuarantinePos] = Entry; + return; + } + CachedBlock PrevEntry = Quarantine[QuarantinePos]; + Quarantine[QuarantinePos] = Entry; + if (OldestTime == 0) + OldestTime = Entry.Time; + Entry = PrevEntry; + } if (EntriesCount >= MaxCount) { if (IsFullEvents++ == 4U) EmptyCache = true; } else { for (u32 I = 0; I < MaxCount; I++) { - if (Entries[I].Block) + if (Entries[I].CommitBase) continue; if (I != 0) Entries[I] = Entries[0]; - Entries[0].Block = reinterpret_cast<uptr>(H); - Entries[0].BlockEnd = H->BlockEnd; - Entries[0].MapBase = H->MapBase; - Entries[0].MapSize = H->MapSize; - Entries[0].Data = H->Data; - Entries[0].Time = Time; + Entries[0] = Entry; EntriesCount++; + if (OldestTime == 0) + OldestTime = Entry.Time; EntryCached = true; break; } } - } - s32 Interval; + } while (0); if (EmptyCache) empty(); - else if ((Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs)) >= 0) + else if (Interval >= 0) releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000); - return EntryCached; + if (!EntryCached) + unmap(reinterpret_cast<void *>(Entry.MapBase), Entry.MapSize, UNMAP_ALL, + &Entry.Data); } - bool retrieve(uptr Size, LargeBlock::Header **H, bool *Zeroed) { + bool retrieve(Options Options, uptr Size, uptr Alignment, + LargeBlock::Header **H, bool *Zeroed) { const uptr PageSize = getPageSizeCached(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - ScopedLock L(Mutex); - if (EntriesCount == 0) - return false; - for (u32 I = 0; I < MaxCount; I++) { - if (!Entries[I].Block) - continue; - const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block; - if (Size > BlockSize) - continue; - if (Size < BlockSize - PageSize * 4U) - continue; - *H = reinterpret_cast<LargeBlock::Header *>(Entries[I].Block); - *Zeroed = Entries[I].Time == 0; - Entries[I].Block = 0; - (*H)->BlockEnd = Entries[I].BlockEnd; - (*H)->MapBase = Entries[I].MapBase; - (*H)->MapSize = Entries[I].MapSize; - (*H)->Data = Entries[I].Data; + bool Found = false; + CachedBlock Entry; + uptr HeaderPos; + { + ScopedLock L(Mutex); + if (EntriesCount == 0) + return false; + for (u32 I = 0; I < MaxCount; I++) { + const uptr CommitBase = Entries[I].CommitBase; + if (!CommitBase) + continue; + const uptr CommitSize = Entries[I].CommitSize; + const uptr AllocPos = + roundDownTo(CommitBase + CommitSize - Size, Alignment); + HeaderPos = + AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + if (HeaderPos > CommitBase + CommitSize) + continue; + if (HeaderPos < CommitBase || + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) + continue; + Found = true; + Entry = Entries[I]; + Entries[I].CommitBase = 0; + break; + } + } + if (Found) { + *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(HeaderPos)); + *Zeroed = Entry.Time == 0; + if (useMemoryTagging<Config>(Options)) + setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data); + uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1); + if (useMemoryTagging<Config>(Options)) { + if (*Zeroed) + storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase), + NewBlockBegin); + else if (Entry.BlockBegin < NewBlockBegin) + storeTags(Entry.BlockBegin, NewBlockBegin); + else + storeTags(untagPointer(NewBlockBegin), + untagPointer(Entry.BlockBegin)); + } + (*H)->CommitBase = Entry.CommitBase; + (*H)->CommitSize = Entry.CommitSize; + (*H)->MapBase = Entry.MapBase; + (*H)->MapSize = Entry.MapSize; + (*H)->Data = Entry.Data; EntriesCount--; - return true; } - return false; + return Found; } bool canCache(uptr Size) { @@ -165,13 +284,15 @@ public: Config::SecondaryCacheMinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; - } else if (O == Option::MaxCacheEntriesCount) { + } + if (O == Option::MaxCacheEntriesCount) { const u32 MaxCount = static_cast<u32>(Value); if (MaxCount > Config::SecondaryCacheEntriesArraySize) return false; atomic_store_relaxed(&MaxEntriesCount, MaxCount); return true; - } else if (O == Option::MaxCacheEntrySize) { + } + if (O == Option::MaxCacheEntrySize) { atomic_store_relaxed(&MaxEntrySize, static_cast<uptr>(Value)); return true; } @@ -181,10 +302,29 @@ public: void releaseToOS() { releaseOlderThan(UINT64_MAX); } + void disableMemoryTagging() { + ScopedLock L(Mutex); + for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { + if (Quarantine[I].CommitBase) { + unmap(reinterpret_cast<void *>(Quarantine[I].MapBase), + Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data); + Quarantine[I].CommitBase = 0; + } + } + const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + for (u32 I = 0; I < MaxCount; I++) + if (Entries[I].CommitBase) + setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0, + &Entries[I].Data); + QuarantinePos = -1U; + } + void disable() { Mutex.lock(); } void enable() { Mutex.unlock(); } + void unmapTestOnly() { empty(); } + private: void empty() { struct { @@ -196,12 +336,12 @@ private: { ScopedLock L(Mutex); for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { - if (!Entries[I].Block) + if (!Entries[I].CommitBase) continue; MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase); MapInfo[N].MapSize = Entries[I].MapSize; MapInfo[N].Data = Entries[I].Data; - Entries[I].Block = 0; + Entries[I].CommitBase = 0; N++; } EntriesCount = 0; @@ -212,59 +352,72 @@ private: &MapInfo[I].Data); } - void releaseOlderThan(u64 Time) { - ScopedLock L(Mutex); - if (!EntriesCount) - return; - for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { - if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time) - continue; - releasePagesToOS(Entries[I].Block, 0, - Entries[I].BlockEnd - Entries[I].Block, - &Entries[I].Data); - Entries[I].Time = 0; - } - } - struct CachedBlock { - uptr Block; - uptr BlockEnd; + uptr CommitBase; + uptr CommitSize; uptr MapBase; uptr MapSize; + uptr BlockBegin; [[no_unique_address]] MapPlatformData Data; u64 Time; }; + void releaseIfOlderThan(CachedBlock &Entry, u64 Time) { + if (!Entry.CommitBase || !Entry.Time) + return; + if (Entry.Time > Time) { + if (OldestTime == 0 || Entry.Time < OldestTime) + OldestTime = Entry.Time; + return; + } + releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.Time = 0; + } + + void releaseOlderThan(u64 Time) { + ScopedLock L(Mutex); + if (!EntriesCount || OldestTime == 0 || OldestTime > Time) + return; + OldestTime = 0; + for (uptr I = 0; I < Config::SecondaryCacheQuarantineSize; I++) + releaseIfOlderThan(Quarantine[I], Time); + for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) + releaseIfOlderThan(Entries[I], Time); + } + HybridMutex Mutex; - CachedBlock Entries[Config::SecondaryCacheEntriesArraySize]; - u32 EntriesCount; - atomic_u32 MaxEntriesCount; - atomic_uptr MaxEntrySize; - uptr LargestSize; - u32 IsFullEvents; - atomic_s32 ReleaseToOsIntervalMs; + u32 EntriesCount = 0; + u32 QuarantinePos = 0; + atomic_u32 MaxEntriesCount = {}; + atomic_uptr MaxEntrySize = {}; + u64 OldestTime = 0; + u32 IsFullEvents = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + + CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; + CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize] = {}; }; template <typename Config> class MapAllocator { public: - void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) { - Cache.initLinkerInitialized(ReleaseToOsInterval); - Stats.initLinkerInitialized(); + void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + DCHECK_EQ(AllocatedBytes, 0U); + DCHECK_EQ(FreedBytes, 0U); + Cache.init(ReleaseToOsInterval); + Stats.init(); if (LIKELY(S)) S->link(&Stats); } - void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(S, ReleaseToOsInterval); - } - void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, + void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0, + uptr *BlockEnd = nullptr, FillContentsMode FillContents = NoFill); - void deallocate(void *Ptr); + void deallocate(Options Options, void *Ptr); static uptr getBlockEnd(void *Ptr) { - return LargeBlock::getHeader(Ptr)->BlockEnd; + auto *B = LargeBlock::getHeader<Config>(Ptr); + return B->CommitBase + B->CommitSize; } static uptr getBlockSize(void *Ptr) { @@ -284,8 +437,12 @@ public: } template <typename F> void iterateOverBlocks(F Callback) const { - for (const auto &H : InUseBlocks) - Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize()); + for (const auto &H : InUseBlocks) { + uptr Ptr = reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize(); + if (allocatorSupportsMemoryTagging<Config>()) + Ptr = untagPointer(Ptr); + Callback(Ptr); + } } uptr canCache(uptr Size) { return Cache.canCache(Size); } @@ -294,16 +451,20 @@ public: void releaseToOS() { Cache.releaseToOS(); } + void disableMemoryTagging() { Cache.disableMemoryTagging(); } + + void unmapTestOnly() { Cache.unmapTestOnly(); } + private: typename Config::SecondaryCache Cache; HybridMutex Mutex; DoublyLinkedList<LargeBlock::Header> InUseBlocks; - uptr AllocatedBytes; - uptr FreedBytes; - uptr LargestSize; - u32 NumberOfAllocs; - u32 NumberOfFrees; + uptr AllocatedBytes = 0; + uptr FreedBytes = 0; + uptr LargestSize = 0; + u32 NumberOfAllocs = 0; + u32 NumberOfFrees = 0; LocalStats Stats; }; @@ -319,26 +480,36 @@ private: // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). template <typename Config> -void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, - uptr *BlockEnd, +void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment, + uptr *BlockEndPtr, FillContentsMode FillContents) { - DCHECK_GE(Size, AlignmentHint); + if (Options.get(OptionBit::AddLargeAllocationSlack)) + Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG; + Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG); const uptr PageSize = getPageSizeCached(); - const uptr RoundedSize = - roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize); - - if (AlignmentHint < PageSize && Cache.canCache(RoundedSize)) { + uptr RoundedSize = + roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() + + Chunk::getHeaderSize(), + PageSize); + if (Alignment > PageSize) + RoundedSize += Alignment - PageSize; + + if (Alignment < PageSize && Cache.canCache(RoundedSize)) { LargeBlock::Header *H; bool Zeroed; - if (Cache.retrieve(RoundedSize, &H, &Zeroed)) { - if (BlockEnd) - *BlockEnd = H->BlockEnd; - void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(H) + - LargeBlock::getHeaderSize()); + if (Cache.retrieve(Options, Size, Alignment, &H, &Zeroed)) { + const uptr BlockEnd = H->CommitBase + H->CommitSize; + if (BlockEndPtr) + *BlockEndPtr = BlockEnd; + uptr HInt = reinterpret_cast<uptr>(H); + if (allocatorSupportsMemoryTagging<Config>()) + HInt = untagPointer(HInt); + const uptr PtrInt = HInt + LargeBlock::getHeaderSize(); + void *Ptr = reinterpret_cast<void *>(PtrInt); if (FillContents && !Zeroed) memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, - H->BlockEnd - reinterpret_cast<uptr>(Ptr)); - const uptr BlockSize = H->BlockEnd - reinterpret_cast<uptr>(H); + BlockEnd - PtrInt); + const uptr BlockSize = BlockEnd - HInt; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -353,9 +524,8 @@ void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, MapPlatformData Data = {}; const uptr MapSize = RoundedSize + 2 * PageSize; - uptr MapBase = - reinterpret_cast<uptr>(map(nullptr, MapSize, "scudo:secondary", - MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); + uptr MapBase = reinterpret_cast<uptr>( + map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); if (UNLIKELY(!MapBase)) return nullptr; uptr CommitBase = MapBase + PageSize; @@ -363,11 +533,11 @@ void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, // In the unlikely event of alignments larger than a page, adjust the amount // of memory we want to commit, and trim the extra memory. - if (UNLIKELY(AlignmentHint >= PageSize)) { + if (UNLIKELY(Alignment >= PageSize)) { // For alignments greater than or equal to a page, the user pointer (eg: the // pointer that is returned by the C or C++ allocation APIs) ends up on a // page boundary , and our headers will live in the preceding page. - CommitBase = roundUpTo(MapBase + PageSize + 1, AlignmentHint) - PageSize; + CommitBase = roundUpTo(MapBase + PageSize + 1, Alignment) - PageSize; const uptr NewMapBase = CommitBase - PageSize; DCHECK_GE(NewMapBase, MapBase); // We only trim the extra memory on 32-bit platforms: 64-bit platforms @@ -376,9 +546,8 @@ void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, unmap(reinterpret_cast<void *>(MapBase), NewMapBase - MapBase, 0, &Data); MapBase = NewMapBase; } - const uptr NewMapEnd = CommitBase + PageSize + - roundUpTo((Size - AlignmentHint), PageSize) + - PageSize; + const uptr NewMapEnd = + CommitBase + PageSize + roundUpTo(Size, PageSize) + PageSize; DCHECK_LE(NewMapEnd, MapEnd); if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) { unmap(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd, 0, &Data); @@ -387,16 +556,22 @@ void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr Ptr = reinterpret_cast<uptr>( - map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary", - MAP_RESIZABLE, &Data)); - LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>(Ptr); + const uptr AllocPos = roundDownTo(CommitBase + CommitSize - Size, Alignment); + mapSecondary<Config>(Options, CommitBase, CommitSize, AllocPos, 0, &Data); + const uptr HeaderPos = + AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>( + LargeBlock::addHeaderTag<Config>(HeaderPos)); + if (useMemoryTagging<Config>(Options)) + storeTags(LargeBlock::addHeaderTag<Config>(CommitBase), + reinterpret_cast<uptr>(H + 1)); H->MapBase = MapBase; H->MapSize = MapEnd - MapBase; - H->BlockEnd = CommitBase + CommitSize; + H->CommitBase = CommitBase; + H->CommitSize = CommitSize; H->Data = Data; - if (BlockEnd) - *BlockEnd = CommitBase + CommitSize; + if (BlockEndPtr) + *BlockEndPtr = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -407,13 +582,13 @@ void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint, Stats.add(StatAllocated, CommitSize); Stats.add(StatMapped, H->MapSize); } - return reinterpret_cast<void *>(Ptr + LargeBlock::getHeaderSize()); + return reinterpret_cast<void *>(HeaderPos + LargeBlock::getHeaderSize()); } -template <typename Config> void MapAllocator<Config>::deallocate(void *Ptr) { - LargeBlock::Header *H = LargeBlock::getHeader(Ptr); - const uptr Block = reinterpret_cast<uptr>(H); - const uptr CommitSize = H->BlockEnd - Block; +template <typename Config> +void MapAllocator<Config>::deallocate(Options Options, void *Ptr) { + LargeBlock::Header *H = LargeBlock::getHeader<Config>(Ptr); + const uptr CommitSize = H->CommitSize; { ScopedLock L(Mutex); InUseBlocks.remove(H); @@ -422,12 +597,7 @@ template <typename Config> void MapAllocator<Config>::deallocate(void *Ptr) { Stats.sub(StatAllocated, CommitSize); Stats.sub(StatMapped, H->MapSize); } - if (Cache.canCache(CommitSize) && Cache.store(H)) - return; - void *Addr = reinterpret_cast<void *>(H->MapBase); - const uptr Size = H->MapSize; - MapPlatformData Data = H->Data; - unmap(Addr, Size, UNMAP_ALL, &Data); + Cache.store(Options, H); } template <typename Config> diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h index 5ed8e2845b38..ba0f78453bcb 100644 --- a/compiler-rt/lib/scudo/standalone/size_class_map.h +++ b/compiler-rt/lib/scudo/standalone/size_class_map.h @@ -64,12 +64,10 @@ class FixedSizeClassMap : public SizeClassMapBase<Config> { static const u8 S = Config::NumBits - 1; static const uptr M = (1UL << S) - 1; - static const uptr SizeDelta = Chunk::getHeaderSize(); - public: static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; - static const uptr MaxSize = (1UL << Config::MaxSizeLog) + SizeDelta; + static const uptr MaxSize = (1UL << Config::MaxSizeLog) + Config::SizeDelta; static const uptr NumClasses = MidClass + ((Config::MaxSizeLog - Config::MidSizeLog) << S) + 1; static_assert(NumClasses <= 256, ""); @@ -79,16 +77,22 @@ public: static uptr getSizeByClassId(uptr ClassId) { DCHECK_NE(ClassId, BatchClassId); if (ClassId <= MidClass) - return (ClassId << Config::MinSizeLog) + SizeDelta; + return (ClassId << Config::MinSizeLog) + Config::SizeDelta; ClassId -= MidClass; const uptr T = MidSize << (ClassId >> S); - return T + (T >> S) * (ClassId & M) + SizeDelta; + return T + (T >> S) * (ClassId & M) + Config::SizeDelta; + } + + static u8 getSizeLSBByClassId(uptr ClassId) { + return u8(getLeastSignificantSetBitIndex(getSizeByClassId(ClassId))); } + static constexpr bool usesCompressedLSBFormat() { return false; } + static uptr getClassIdBySize(uptr Size) { - if (Size <= SizeDelta + (1 << Config::MinSizeLog)) + if (Size <= Config::SizeDelta + (1 << Config::MinSizeLog)) return 1; - Size -= SizeDelta; + Size -= Config::SizeDelta; DCHECK_LE(Size, MaxSize); if (Size <= MidSize) return (Size + MinSize - 1) >> Config::MinSizeLog; @@ -137,7 +141,41 @@ class TableSizeClassMap : public SizeClassMapBase<Config> { u8 Tab[getTableSize()] = {}; }; - static constexpr SizeTable Table = {}; + static constexpr SizeTable SzTable = {}; + + struct LSBTable { + constexpr LSBTable() { + u8 Min = 255, Max = 0; + for (uptr I = 0; I != ClassesSize; ++I) { + for (u8 Bit = 0; Bit != 64; ++Bit) { + if (Config::Classes[I] & (1 << Bit)) { + Tab[I] = Bit; + if (Bit < Min) + Min = Bit; + if (Bit > Max) + Max = Bit; + break; + } + } + } + + if (Max - Min > 3 || ClassesSize > 32) + return; + + UseCompressedFormat = true; + CompressedMin = Min; + for (uptr I = 0; I != ClassesSize; ++I) + CompressedValue |= u64(Tab[I] - Min) << (I * 2); + } + + u8 Tab[ClassesSize] = {}; + + bool UseCompressedFormat = false; + u8 CompressedMin = 0; + u64 CompressedValue = 0; + }; + + static constexpr LSBTable LTable = {}; public: static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; @@ -152,6 +190,18 @@ public: return Config::Classes[ClassId - 1]; } + static u8 getSizeLSBByClassId(uptr ClassId) { + if (LTable.UseCompressedFormat) + return ((LTable.CompressedValue >> ((ClassId - 1) * 2)) & 3) + + LTable.CompressedMin; + else + return LTable.Tab[ClassId - 1]; + } + + static constexpr bool usesCompressedLSBFormat() { + return LTable.UseCompressedFormat; + } + static uptr getClassIdBySize(uptr Size) { if (Size <= Config::Classes[0]) return 1; @@ -159,7 +209,7 @@ public: DCHECK_LE(Size, MaxSize); if (Size <= (1 << Config::MidSizeLog)) return ((Size - 1) >> Config::MinSizeLog) + 1; - return Table.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; + return SzTable.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; } static u32 getMaxCachedHint(uptr Size) { @@ -168,13 +218,37 @@ public: } }; +struct DefaultSizeClassConfig { + static const uptr NumBits = 3; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 17; + static const u32 MaxNumCachedHint = 14; + static const uptr MaxBytesCachedLog = 10; + static const uptr SizeDelta = 0; +}; + +typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap; + +struct FuchsiaSizeClassConfig { + static const uptr NumBits = 3; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 8; + static const uptr MaxSizeLog = 17; + static const u32 MaxNumCachedHint = 10; + static const uptr MaxBytesCachedLog = 10; + static const uptr SizeDelta = Chunk::getHeaderSize(); +}; + +typedef FixedSizeClassMap<FuchsiaSizeClassConfig> FuchsiaSizeClassMap; + struct AndroidSizeClassConfig { #if SCUDO_WORDSIZE == 64U static const uptr NumBits = 7; static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 6; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 14; + static const u32 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 13; static constexpr u32 Classes[] = { @@ -208,16 +282,9 @@ struct AndroidSizeClassConfig { typedef TableSizeClassMap<AndroidSizeClassConfig> AndroidSizeClassMap; -struct DefaultSizeClassConfig { - static const uptr NumBits = 3; - static const uptr MinSizeLog = 5; - static const uptr MidSizeLog = 8; - static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 8; - static const uptr MaxBytesCachedLog = 10; -}; - -typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap; +#if SCUDO_WORDSIZE == 64U && defined(__clang__) +static_assert(AndroidSizeClassMap::usesCompressedLSBFormat(), ""); +#endif struct SvelteSizeClassConfig { #if SCUDO_WORDSIZE == 64U @@ -225,22 +292,38 @@ struct SvelteSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 4; + static const u32 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 10; + static const uptr SizeDelta = Chunk::getHeaderSize(); #else static const uptr NumBits = 4; static const uptr MinSizeLog = 3; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 5; + static const u32 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 10; + static const uptr SizeDelta = Chunk::getHeaderSize(); #endif }; typedef FixedSizeClassMap<SvelteSizeClassConfig> SvelteSizeClassMap; +// Trusty is configured to only have one region containing blocks of size +// 2^7 bytes. +struct TrustySizeClassConfig { + static const uptr NumBits = 1; + static const uptr MinSizeLog = 7; + static const uptr MidSizeLog = 7; + static const uptr MaxSizeLog = 7; + static const u32 MaxNumCachedHint = 8; + static const uptr MaxBytesCachedLog = 10; + static const uptr SizeDelta = 0; +}; + +typedef FixedSizeClassMap<TrustySizeClassConfig> TrustySizeClassMap; + template <typename SCMap> inline void printMap() { - ScopedString Buffer(1024); + ScopedString Buffer; uptr PrevS = 0; uptr TotalCached = 0; for (uptr I = 0; I < SCMap::NumClasses; I++) { diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h index 7968f7efff7c..458198fcb7aa 100644 --- a/compiler-rt/lib/scudo/standalone/stack_depot.h +++ b/compiler-rt/lib/scudo/standalone/stack_depot.h @@ -40,7 +40,7 @@ public: class StackDepot { HybridMutex RingEndMu; - u32 RingEnd; + u32 RingEnd = 0; // This data structure stores a stack trace for each allocation and // deallocation when stack trace recording is enabled, that may be looked up @@ -70,7 +70,7 @@ class StackDepot { #endif static const uptr TabSize = 1 << TabBits; static const uptr TabMask = TabSize - 1; - atomic_u32 Tab[TabSize]; + atomic_u32 Tab[TabSize] = {}; #ifdef SCUDO_FUZZ static const uptr RingBits = 4; @@ -79,7 +79,7 @@ class StackDepot { #endif static const uptr RingSize = 1 << RingBits; static const uptr RingMask = RingSize - 1; - atomic_u64 Ring[RingSize]; + atomic_u64 Ring[RingSize] = {}; public: // Insert hash of the stack trace [Begin, End) into the stack depot, and diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h index d76b904949ea..be5bf2d3720a 100644 --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -29,8 +29,10 @@ typedef uptr StatCounters[StatCount]; // LocalStats::add'ing, this is OK, we will still get a meaningful number. class LocalStats { public: - void initLinkerInitialized() {} - void init() { memset(this, 0, sizeof(*this)); } + void init() { + for (uptr I = 0; I < StatCount; I++) + DCHECK_EQ(get(static_cast<StatType>(I)), 0U); + } void add(StatType I, uptr V) { V += atomic_load_relaxed(&StatsArray[I]); @@ -46,23 +48,17 @@ public: uptr get(StatType I) const { return atomic_load_relaxed(&StatsArray[I]); } - LocalStats *Next; - LocalStats *Prev; + LocalStats *Next = nullptr; + LocalStats *Prev = nullptr; private: - atomic_uptr StatsArray[StatCount]; + atomic_uptr StatsArray[StatCount] = {}; }; // Global stats, used for aggregation and querying. class GlobalStats : public LocalStats { public: - void initLinkerInitialized() {} - void init() { - LocalStats::init(); - Mutex.init(); - StatsList = {}; - initLinkerInitialized(); - } + void init() { LocalStats::init(); } void link(LocalStats *S) { ScopedLock L(Mutex); @@ -89,8 +85,11 @@ public: S[I] = static_cast<sptr>(S[I]) >= 0 ? S[I] : 0; } - void disable() { Mutex.lock(); } - void enable() { Mutex.unlock(); } + void lock() { Mutex.lock(); } + void unlock() { Mutex.unlock(); } + + void disable() { lock(); } + void enable() { unlock(); } private: mutable HybridMutex Mutex; diff --git a/compiler-rt/lib/scudo/standalone/string_utils.cpp b/compiler-rt/lib/scudo/standalone/string_utils.cpp index f304491019b2..acf85889fcff 100644 --- a/compiler-rt/lib/scudo/standalone/string_utils.cpp +++ b/compiler-rt/lib/scudo/standalone/string_utils.cpp @@ -115,8 +115,8 @@ static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) { return Res; } -int formatString(char *Buffer, uptr BufferLength, const char *Format, - va_list Args) { +static int formatString(char *Buffer, uptr BufferLength, const char *Format, + va_list Args) { static const char *PrintfFormatsHelp = "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; " "%[-]([0-9]*)?(\\.\\*)?s; %c\n"; @@ -210,8 +210,15 @@ int formatString(char *Buffer, uptr BufferLength, const char *Format, return Res; } +int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) { + va_list Args; + va_start(Args, Format); + int Res = formatString(Buffer, BufferLength, Format, Args); + va_end(Args); + return Res; +} + void ScopedString::append(const char *Format, va_list Args) { - DCHECK_LT(Length, String.size()); va_list ArgsCopy; va_copy(ArgsCopy, Args); // formatString doesn't currently support a null buffer or zero buffer length, @@ -220,11 +227,13 @@ void ScopedString::append(const char *Format, va_list Args) { char C[1]; const uptr AdditionalLength = static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1; + const uptr Length = length(); String.resize(Length + AdditionalLength); - formatString(String.data() + Length, AdditionalLength, Format, ArgsCopy); + const uptr FormattedLength = static_cast<uptr>(formatString( + String.data() + Length, String.size() - Length, Format, ArgsCopy)); + RAW_CHECK(data()[length()] == '\0'); + RAW_CHECK(FormattedLength + 1 == AdditionalLength); va_end(ArgsCopy); - Length = strlen(String.data()); - CHECK_LT(Length, String.size()); } FORMAT(2, 3) @@ -239,7 +248,7 @@ FORMAT(1, 2) void Printf(const char *Format, ...) { va_list Args; va_start(Args, Format); - ScopedString Msg(1024); + ScopedString Msg; Msg.append(Format, Args); outputRaw(Msg.data()); va_end(Args); diff --git a/compiler-rt/lib/scudo/standalone/string_utils.h b/compiler-rt/lib/scudo/standalone/string_utils.h index acd60bda9d8d..06d23d42246d 100644 --- a/compiler-rt/lib/scudo/standalone/string_utils.h +++ b/compiler-rt/lib/scudo/standalone/string_utils.h @@ -18,14 +18,12 @@ namespace scudo { class ScopedString { public: - explicit ScopedString(uptr MaxLength) : String(MaxLength), Length(0) { - String[0] = '\0'; - } - uptr length() { return Length; } + explicit ScopedString() { String.push_back('\0'); } + uptr length() { return String.size() - 1; } const char *data() { return String.data(); } void clear() { - String[0] = '\0'; - Length = 0; + String.clear(); + String.push_back('\0'); } void append(const char *Format, va_list Args); void append(const char *Format, ...); @@ -33,9 +31,9 @@ public: private: Vector<char> String; - uptr Length; }; +int formatString(char *Buffer, uptr BufferLength, const char *Format, ...); void Printf(const char *Format, ...); } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/trusty.cpp b/compiler-rt/lib/scudo/standalone/trusty.cpp new file mode 100644 index 000000000000..81d6bc585f09 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/trusty.cpp @@ -0,0 +1,100 @@ +//===-- trusty.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_TRUSTY + +#include "common.h" +#include "mutex.h" +#include "string_utils.h" +#include "trusty.h" + +#include <errno.h> // for errno +#include <stdio.h> // for printf() +#include <stdlib.h> // for getenv() +#include <sys/auxv.h> // for getauxval() +#include <time.h> // for clock_gettime() +#include <trusty_syscalls.h> // for _trusty_brk() + +#define SBRK_ALIGN 32 + +namespace scudo { + +uptr getPageSize() { return getauxval(AT_PAGESZ); } + +void NORETURN die() { abort(); } + +void *map(UNUSED void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, + UNUSED MapPlatformData *Data) { + // Calling _trusty_brk(0) returns the current program break. + uptr ProgramBreak = reinterpret_cast<uptr>(_trusty_brk(0)); + uptr Start; + uptr End; + + Start = roundUpTo(ProgramBreak, SBRK_ALIGN); + // Don't actually extend the heap if MAP_NOACCESS flag is set since this is + // the case where Scudo tries to reserve a memory region without mapping + // physical pages. + if (Flags & MAP_NOACCESS) + return reinterpret_cast<void *>(Start); + + // Attempt to extend the heap by Size bytes using _trusty_brk. + End = roundUpTo(Start + Size, SBRK_ALIGN); + ProgramBreak = + reinterpret_cast<uptr>(_trusty_brk(reinterpret_cast<void *>(End))); + if (ProgramBreak < End) { + errno = ENOMEM; + dieOnMapUnmapError(Size); + return nullptr; + } + return reinterpret_cast<void *>(Start); // Base of new reserved region. +} + +// Unmap is a no-op since Trusty uses sbrk instead of memory mapping. +void unmap(UNUSED void *Addr, UNUSED uptr Size, UNUSED uptr Flags, + UNUSED MapPlatformData *Data) {} + +void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, + UNUSED MapPlatformData *Data) {} + +void releasePagesToOS(UNUSED uptr BaseAddress, UNUSED uptr Offset, + UNUSED uptr Size, UNUSED MapPlatformData *Data) {} + +const char *getEnv(const char *Name) { return getenv(Name); } + +// All mutex operations are a no-op since Trusty doesn't currently support +// threads. +bool HybridMutex::tryLock() { return true; } + +void HybridMutex::lockSlow() {} + +void HybridMutex::unlock() {} + +u64 getMonotonicTime() { + timespec TS; + clock_gettime(CLOCK_MONOTONIC, &TS); + return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast<u64>(TS.tv_nsec); +} + +u32 getNumberOfCPUs() { return 0; } + +u32 getThreadID() { return 0; } + +bool getRandom(UNUSED void *Buffer, UNUSED uptr Length, UNUSED bool Blocking) { + return false; +} + +void outputRaw(const char *Buffer) { printf("%s", Buffer); } + +void setAbortMessage(UNUSED const char *Message) {} + +} // namespace scudo + +#endif // SCUDO_TRUSTY diff --git a/compiler-rt/lib/scudo/standalone/trusty.h b/compiler-rt/lib/scudo/standalone/trusty.h new file mode 100644 index 000000000000..50edd1c6fe63 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/trusty.h @@ -0,0 +1,24 @@ +//===-- trusty.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_TRUSTY_H_ +#define SCUDO_TRUSTY_H_ + +#include "platform.h" + +#if SCUDO_TRUSTY + +namespace scudo { +// MapPlatformData is unused on Trusty, define it as a minimially sized +// structure. +struct MapPlatformData {}; +} // namespace scudo + +#endif // SCUDO_TRUSTY + +#endif // SCUDO_TRUSTY_H_ diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h index b3701c63f8a9..b400a3b56da9 100644 --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -26,16 +26,15 @@ namespace scudo { template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { typename Allocator::CacheT Cache; typename Allocator::QuarantineCacheT QuarantineCache; - u8 DestructorIterations; + using ThisT = TSD<Allocator>; + u8 DestructorIterations = 0; - void initLinkerInitialized(Allocator *Instance) { + void init(Allocator *Instance) { + DCHECK_EQ(DestructorIterations, 0U); + DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT))); Instance->initCache(&Cache); DestructorIterations = PTHREAD_DESTRUCTOR_ITERATIONS; } - void init(Allocator *Instance) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(Instance); - } void commitBack(Allocator *Instance) { Instance->commitBack(this); } @@ -59,7 +58,7 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { private: HybridMutex Mutex; - atomic_uptr Precedence; + atomic_uptr Precedence = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h index 1704c8cf80d8..bba0c277c6a7 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -25,18 +25,36 @@ struct ThreadState { template <class Allocator> void teardownThread(void *Ptr); template <class Allocator> struct TSDRegistryExT { - void initLinkerInitialized(Allocator *Instance) { - Instance->initLinkerInitialized(); + void init(Allocator *Instance) { + DCHECK(!Initialized); + Instance->init(); CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0); - FallbackTSD.initLinkerInitialized(Instance); + FallbackTSD.init(Instance); Initialized = true; } - void init(Allocator *Instance) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(Instance); + + void initOnceMaybe(Allocator *Instance) { + ScopedLock L(Mutex); + if (LIKELY(Initialized)) + return; + init(Instance); // Sets Initialized. } - void unmapTestOnly() {} + void unmapTestOnly(Allocator *Instance) { + DCHECK(Instance); + if (reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey))) { + DCHECK_EQ(reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey)), + Instance); + ThreadTSD.commitBack(Instance); + ThreadTSD = {}; + } + CHECK_EQ(pthread_key_delete(PThreadKey), 0); + PThreadKey = {}; + FallbackTSD.commitBack(Instance); + FallbackTSD = {}; + State = {}; + Initialized = false; + } ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { if (LIKELY(State.InitState != ThreadState::NotInitialized)) @@ -80,13 +98,6 @@ template <class Allocator> struct TSDRegistryExT { bool getDisableMemInit() { return State.DisableMemInit; } private: - void initOnceMaybe(Allocator *Instance) { - ScopedLock L(Mutex); - if (LIKELY(Initialized)) - return; - initLinkerInitialized(Instance); // Sets Initialized. - } - // Using minimal initialization allows for global initialization while keeping // the thread specific structure untouched. The fallback structure will be // used instead. @@ -96,14 +107,14 @@ private: return; CHECK_EQ( pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0); - ThreadTSD.initLinkerInitialized(Instance); + ThreadTSD.init(Instance); State.InitState = ThreadState::Initialized; Instance->callPostInitCallback(); } - pthread_key_t PThreadKey; - bool Initialized; - atomic_u8 Disabled; + pthread_key_t PThreadKey = {}; + bool Initialized = false; + atomic_u8 Disabled = {}; TSD<Allocator> FallbackTSD; HybridMutex Mutex; static thread_local ThreadState State; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index 6a68b3ef5453..1c2a880416b9 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -24,21 +24,32 @@ namespace scudo { template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount> struct TSDRegistrySharedT { - void initLinkerInitialized(Allocator *Instance) { - Instance->initLinkerInitialized(); + void init(Allocator *Instance) { + DCHECK(!Initialized); + Instance->init(); for (u32 I = 0; I < TSDsArraySize; I++) - TSDs[I].initLinkerInitialized(Instance); + TSDs[I].init(Instance); const u32 NumberOfCPUs = getNumberOfCPUs(); setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount : Min(NumberOfCPUs, DefaultTSDCount)); Initialized = true; } - void init(Allocator *Instance) { - memset(this, 0, sizeof(*this)); - initLinkerInitialized(Instance); + + void initOnceMaybe(Allocator *Instance) { + ScopedLock L(Mutex); + if (LIKELY(Initialized)) + return; + init(Instance); // Sets Initialized. } - void unmapTestOnly() { setCurrentTSD(nullptr); } + void unmapTestOnly(Allocator *Instance) { + for (u32 I = 0; I < TSDsArraySize; I++) { + TSDs[I].commitBack(Instance); + TSDs[I] = {}; + } + setCurrentTSD(nullptr); + Initialized = false; + } ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, UNUSED bool MinimalInit) { @@ -139,13 +150,6 @@ private: *getTlsPtr() |= B; } - void initOnceMaybe(Allocator *Instance) { - ScopedLock L(Mutex); - if (LIKELY(Initialized)) - return; - initLinkerInitialized(Instance); // Sets Initialized. - } - NOINLINE void initThread(Allocator *Instance) { initOnceMaybe(Instance); // Initial context assignment is done in a plain round-robin fashion. @@ -197,11 +201,11 @@ private: return CurrentTSD; } - atomic_u32 CurrentIndex; - u32 NumberOfTSDs; - u32 NumberOfCoPrimes; - u32 CoPrimes[TSDsArraySize]; - bool Initialized; + atomic_u32 CurrentIndex = {}; + u32 NumberOfTSDs = 0; + u32 NumberOfCoPrimes = 0; + u32 CoPrimes[TSDsArraySize] = {}; + bool Initialized = false; HybridMutex Mutex; HybridMutex MutexTSDs; TSD<Allocator> TSDs[TSDsArraySize]; diff --git a/compiler-rt/lib/scudo/standalone/vector.h b/compiler-rt/lib/scudo/standalone/vector.h index 6ca350a25771..2c9a6e2aa655 100644 --- a/compiler-rt/lib/scudo/standalone/vector.h +++ b/compiler-rt/lib/scudo/standalone/vector.h @@ -19,14 +19,13 @@ namespace scudo { // small vectors. The current implementation supports only POD types. template <typename T> class VectorNoCtor { public: - void init(uptr InitialCapacity) { - CapacityBytes = 0; - Size = 0; - Data = nullptr; + void init(uptr InitialCapacity = 0) { + Data = reinterpret_cast<T *>(&LocalData[0]); + CapacityBytes = sizeof(LocalData); reserve(InitialCapacity); } void destroy() { - if (Data) + if (Data != reinterpret_cast<T *>(&LocalData[0])) unmap(Data, CapacityBytes); } T &operator[](uptr I) { @@ -82,26 +81,24 @@ private: void reallocate(uptr NewCapacity) { DCHECK_GT(NewCapacity, 0); DCHECK_LE(Size, NewCapacity); - const uptr NewCapacityBytes = - roundUpTo(NewCapacity * sizeof(T), getPageSizeCached()); + NewCapacity = roundUpTo(NewCapacity * sizeof(T), getPageSizeCached()); T *NewData = - reinterpret_cast<T *>(map(nullptr, NewCapacityBytes, "scudo:vector")); - if (Data) { - memcpy(NewData, Data, Size * sizeof(T)); - unmap(Data, CapacityBytes); - } + reinterpret_cast<T *>(map(nullptr, NewCapacity, "scudo:vector")); + memcpy(NewData, Data, Size * sizeof(T)); + destroy(); Data = NewData; - CapacityBytes = NewCapacityBytes; + CapacityBytes = NewCapacity; } - T *Data; - uptr CapacityBytes; - uptr Size; + T *Data = nullptr; + u8 LocalData[256] = {}; + uptr CapacityBytes = 0; + uptr Size = 0; }; template <typename T> class Vector : public VectorNoCtor<T> { public: - Vector() { VectorNoCtor<T>::init(1); } + Vector() { VectorNoCtor<T>::init(); } explicit Vector(uptr Count) { VectorNoCtor<T>::init(Count); this->resize(Count); diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp index 098cc089a1ca..81c7dd60ee33 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp @@ -26,6 +26,7 @@ extern "C" void SCUDO_PREFIX(malloc_postinit)(); // Export the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. +SCUDO_REQUIRE_CONSTANT_INITIALIZATION scudo::Allocator<scudo::Config, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; #include "wrappers_c.inc" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 9d640038d8e2..43efb02cb860 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -260,4 +260,12 @@ SCUDO_PREFIX(malloc_set_pattern_fill_contents)(int pattern_fill_contents) { pattern_fill_contents ? scudo::PatternOrZeroFill : scudo::NoFill); } +// Sets whether scudo adds a small amount of slack at the end of large +// allocations, before the guard page. This can be enabled to work around buggy +// applications that read a few bytes past the end of their allocation. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_add_large_allocation_slack)(int add_slack) { + SCUDO_ALLOCATOR.setAddLargeAllocationSlack(add_slack); +} + } // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp index 4298e69b5774..18c3bf2c0edf 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -23,6 +23,7 @@ #define SCUDO_ALLOCATOR Allocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; @@ -36,6 +37,7 @@ static scudo::Allocator<scudo::AndroidConfig, SCUDO_PREFIX(malloc_postinit)> #define SCUDO_ALLOCATOR SvelteAllocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator<scudo::AndroidSvelteConfig, SCUDO_PREFIX(malloc_postinit)> SCUDO_ALLOCATOR; @@ -48,12 +50,15 @@ static scudo::Allocator<scudo::AndroidSvelteConfig, // TODO(kostyak): support both allocators. INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } -INTERFACE void __scudo_get_error_info( - struct scudo_error_info *error_info, uintptr_t fault_addr, - const char *stack_depot, const char *region_info, const char *memory, - const char *memory_tags, uintptr_t memory_addr, size_t memory_size) { +INTERFACE void +__scudo_get_error_info(struct scudo_error_info *error_info, + uintptr_t fault_addr, const char *stack_depot, + const char *region_info, const char *ring_buffer, + const char *memory, const char *memory_tags, + uintptr_t memory_addr, size_t memory_size) { Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info, - memory, memory_tags, memory_addr, memory_size); + ring_buffer, memory, memory_tags, memory_addr, + memory_size); } INTERFACE const char *__scudo_get_stack_depot_addr() { @@ -72,4 +77,12 @@ INTERFACE size_t __scudo_get_region_info_size() { return Allocator.getRegionInfoArraySize(); } +INTERFACE const char *__scudo_get_ring_buffer_addr() { + return Allocator.getRingBufferAddress(); +} + +INTERFACE size_t __scudo_get_ring_buffer_size() { + return Allocator.getRingBufferSize(); +} + #endif // SCUDO_ANDROID && _BIONIC |