diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 6b1e1d99d0..d52591085a 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5118,7 +5118,9 @@ namespace ranges { return {_STD move(_First), _STD move(_Final)}; } else { - auto [_Mid_first, _Mid_last] = _Reverse_until_mid_unchecked(_STD move(_First), _Mid, _Final); + const auto _Result = _Reverse_until_mid_unchecked(_STD move(_First), _Mid, _Final); + auto _Mid_first = _Result.begin(); + auto _Mid_last = _Result.end(); _Reverse_common(_Mid_first, _Mid_last); if (_Mid_first == _Mid) { diff --git a/stl/inc/any b/stl/inc/any index 04e9fed839..326414d984 100644 --- a/stl/inc/any +++ b/stl/inc/any @@ -58,7 +58,7 @@ inline constexpr bool _Any_is_small = alignof(_Ty) <= alignof(max_align_t) enum class _Any_representation : uintptr_t { _Trivial, _Big, _Small }; struct _Any_big_RTTI { // Hand-rolled vtable for types that must be heap allocated in an any - using _Destroy_fn = void __CLRCALL_PURE_OR_CDECL(void*) noexcept; + using _Destroy_fn = void __CLRCALL_PURE_OR_CDECL(void*) _NOEXCEPT_FNPTR; using _Copy_fn = void* __CLRCALL_PURE_OR_CDECL(const void*); template @@ -76,9 +76,9 @@ struct _Any_big_RTTI { // Hand-rolled vtable for types that must be heap allocat }; struct _Any_small_RTTI { // Hand-rolled vtable for nontrivial types that can be stored internally in an any - using _Destroy_fn = void __CLRCALL_PURE_OR_CDECL(void*) noexcept; + using _Destroy_fn = void __CLRCALL_PURE_OR_CDECL(void*) _NOEXCEPT_FNPTR; using _Copy_fn = void __CLRCALL_PURE_OR_CDECL(void*, const void*); - using _Move_fn = void __CLRCALL_PURE_OR_CDECL(void*, void*) noexcept; + using _Move_fn = void __CLRCALL_PURE_OR_CDECL(void*, void*) _NOEXCEPT_FNPTR; template static void __CLRCALL_PURE_OR_CDECL _Destroy_impl(void* const _Target) noexcept { diff --git a/stl/inc/atomic b/stl/inc/atomic index 633280f5d5..40a8edc9e9 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -20,6 +20,7 @@ #if _HAS_CXX20 #include #endif // _HAS_CXX20 +#include #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) @@ -48,32 +49,31 @@ _STL_DISABLE_CLANG_WARNINGS #endif // _DEBUG #endif // _INVALID_MEMORY_ORDER -#if 0 // TRANSITION, ABI // MACRO _STD_COMPARE_EXCHANGE_128 -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || defined(_M_ARM64) +#ifdef _WIN64 +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 #define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || defined(_M_ARM64) -#if defined(_M_X64) && !_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv // 16-byte atomics are separately compiled for x64, as not all x64 hardware has the cmpxchg16b // instruction; in the event this instruction is not available, the fallback is a global -// CRITICAL_SECTION shared by all 16-byte atomics. +// synchronization object shared by all 16-byte atomics. // (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) // All pointer parameters must be 16-byte aligned. -_NODISCARD extern "C" unsigned char __cdecl __std_atomic_compare_exchange_128( +extern "C" _NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128( _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, _Inout_bytecount_(16) long long* _ComparandResult) noexcept; -_NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept; +extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #define _STD_COMPARE_EXCHANGE_128 __std_atomic_compare_exchange_128 -#endif // defined(_M_X64) && !_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#endif // _WIN64 // MACRO _ATOMIC_HAS_DCAS // Controls whether atomic::is_always_lock_free triggers for sizeof(void *) or 2 * sizeof(void *) -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || !defined(_M_X64) +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) #define _ATOMIC_HAS_DCAS 1 -#else // ^^ We always have DCAS / We only sometimes have DCAS vvv +#else // ^^^ We always have DCAS / We only sometimes have DCAS vvv #define _ATOMIC_HAS_DCAS 0 -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || !defined(_M_X64) -#endif // TRANSITION, ABI +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) // MACRO _ATOMIC_CHOOSE_INTRINSIC #if defined(_M_IX86) || defined(_M_X64) @@ -118,6 +118,9 @@ _NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept; #define ATOMIC_LLONG_LOCK_FREE 2 #define ATOMIC_POINTER_LOCK_FREE 2 +_EXTERN_C +_Smtx_t* __stdcall __std_atomic_get_mutex(const void* _Key) noexcept; +_END_EXTERN_C // Padding bits should not participate in cmpxchg comparison starting in C++20. // Clang does not have __builtin_zero_non_value_bits to exclude these bits to implement this C++20 feature. // The EDG front-end substitutes everything and runs into incomplete types passed to atomic. @@ -341,6 +344,12 @@ struct _Atomic_storage_traits { // properties for how _Ty is stored in an atomic static constexpr bool _Uses_padding = _Padding_size != 0; }; +template +struct _Atomic_storage_traits<_Ty&> { // properties for how _Ty is stored in an atomic_ref + static constexpr size_t _Storage_size = sizeof(_Ty); + static constexpr bool _Uses_padding = false; +}; + // STRUCT TEMPLATE _Atomic_padded template ::_Uses_padding> struct _Atomic_padded { // aggregate to allow explicit constexpr zeroing of padding @@ -352,11 +361,29 @@ template struct _Atomic_padded<_Ty, false> { alignas(sizeof(_Ty)) mutable _Ty _Value; // align to sizeof(T); x86 stack aligns 8-byte objects on 4-byte boundaries }; + +template +struct _Atomic_padded<_Ty&, false> { + _Ty& _Value; +}; + #endif // TRANSITION, ABI +template +struct _Atomic_storage_types { + using _TStorage = _Atomic_padded<_Ty>; + using _Spinlock = long; +}; + +template +struct _Atomic_storage_types<_Ty&> { + using _TStorage = _Ty&; + using _Spinlock = _Smtx_t*; // POINTER TO mutex +}; + // STRUCT TEMPLATE _Atomic_storage #if 1 // TRANSITION, ABI -template +template )> #else // ^^^ don't break ABI / break ABI vvv template ::_Storage_size> #endif // TRANSITION, ABI @@ -371,8 +398,9 @@ void _Atomic_wait_direct( const _Value_type _Observed_bytes = _Atomic_reinterpret_as<_Value_type>(_This->load(_Order)); if (_Expected_bytes != _Observed_bytes) { #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + using _TVal = remove_reference_t<_Ty>; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const _Value_type _Mask_val = _Atomic_reinterpret_as<_Value_type>(_Mask._Ref()); if (((_Expected_bytes ^ _Observed_bytes) & _Mask_val) == 0) { @@ -391,13 +419,13 @@ void _Atomic_wait_direct( #endif // _HAS_CXX20 #if 1 // TRANSITION, ABI -inline void _Atomic_lock_spinlock(long& _Spinlock) noexcept { +inline void _Atomic_lock_acquire(long& _Spinlock) noexcept { while (_InterlockedExchange(&_Spinlock, 1)) { _YIELD_PROCESSOR(); } } -inline void _Atomic_unlock_spinlock(long& _Spinlock) noexcept { +inline void _Atomic_lock_release(long& _Spinlock) noexcept { #if defined(_M_ARM) || defined(_M_ARM64) _Memory_barrier(); __iso_volatile_store32(reinterpret_cast(&_Spinlock), 0); @@ -407,32 +435,56 @@ inline void _Atomic_unlock_spinlock(long& _Spinlock) noexcept { #endif // hardware } -class _Spinlock_guard { + +inline void _Atomic_lock_acquire(_Smtx_t* _Spinlock) noexcept { + _Smtx_lock_exclusive(_Spinlock); +} + +inline void _Atomic_lock_release(_Smtx_t* _Spinlock) noexcept { + _Smtx_unlock_exclusive(_Spinlock); +} + +template +class _Atomic_lock_guard { public: - explicit _Spinlock_guard(long& _Spinlock_) noexcept : _Spinlock(_Spinlock_) { - _Atomic_lock_spinlock(_Spinlock); + explicit _Atomic_lock_guard(_Spinlock_t& _Spinlock_) noexcept : _Spinlock(_Spinlock_) { + _Atomic_lock_acquire(_Spinlock); } - ~_Spinlock_guard() { - _Atomic_unlock_spinlock(_Spinlock); + ~_Atomic_lock_guard() { + _Atomic_lock_release(_Spinlock); } - _Spinlock_guard(const _Spinlock_guard&) = delete; - _Spinlock_guard& operator=(const _Spinlock_guard&) = delete; + _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; + _Atomic_lock_guard& operator=(const _Atomic_lock_guard&) = delete; private: - long& _Spinlock; + _Spinlock_t& _Spinlock; }; #if _HAS_CXX20 -inline bool __stdcall _Atomic_wait_compare_non_lock_free( +template +bool __stdcall _Atomic_wait_compare_non_lock_free( const void* _Storage, void* _Comparand, size_t _Size, void* _Spinlock_raw) noexcept { - long& _Spinlock = *static_cast(_Spinlock_raw); - _Atomic_lock_spinlock(_Spinlock); + _Spinlock_t& _Spinlock = *static_cast<_Spinlock_t*>(_Spinlock_raw); + _Atomic_lock_acquire(_Spinlock); const auto _Cmp_result = _CSTD memcmp(_Storage, _Comparand, _Size); - _Atomic_unlock_spinlock(_Spinlock); + _Atomic_lock_release(_Spinlock); return _Cmp_result == 0; } + +#ifdef _WIN64 +inline bool __stdcall _Atomic_wait_compare_16_bytes(const void* _Storage, void* _Comparand, size_t, void*) noexcept { + const auto _Dest = static_cast(const_cast(_Storage)); + const auto _Cmp = static_cast(_Comparand); + alignas(16) long long _Tmp[2] = {_Cmp[0], _Cmp[1]}; +#ifdef _M_X64 + return _STD_COMPARE_EXCHANGE_128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; +#else // ^^^ _M_X64 / ARM64 vvv + return _InterlockedCompareExchange128_nf(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; +#endif // ^^^ ARM64 ^^^ +} +#endif // _WIN64 #endif // _HAS_CXX20 #endif // TRANSITION, ABI @@ -441,40 +493,41 @@ struct _Atomic_storage { // Provides operations common to all specializations of std::atomic, load, store, exchange, and CAS. // Locking version used when hardware has no atomic operations for sizeof(_Ty). + using _TVal = remove_reference_t<_Ty>; + using _Guard = _Atomic_lock_guard::_Spinlock>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage(_Value) { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage(_Value) { // non-atomically initialize this atomic } - void store(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + void store(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // store with sequential consistency _Check_store_memory_order(_Order); - _Lock(); + _Guard _Lock{_Spinlock}; _Storage = _Value; - _Unlock(); } - _NODISCARD _Ty load(const memory_order _Order = memory_order_seq_cst) const noexcept { + _NODISCARD _TVal load(const memory_order _Order = memory_order_seq_cst) const noexcept { // load with sequential consistency _Check_load_memory_order(_Order); - _Lock(); - _Ty _Local(_Storage); - _Unlock(); + _Guard _Lock{_Spinlock}; + _TVal _Local(_Storage); return _Local; } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange _Value with _Storage with sequential consistency _Check_memory_order(_Order); - _Lock(); - _Ty _Result(_Storage); + _Guard _Lock{_Spinlock}; + _TVal _Result(_Storage); _Storage = _Value; - _Unlock(); return _Result; } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with sequential consistency, plain _Check_memory_order(_Order); const auto _Storage_ptr = _STD addressof(_Storage); @@ -483,50 +536,49 @@ struct _Atomic_storage { #if _CMPXCHG_MASK_OUT_PADDING_BITS __builtin_zero_non_value_bits(_Expected_ptr); #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _Lock(); + _Guard _Lock{_Spinlock}; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Local; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Local; const auto _Local_ptr = _Local._Ptr(); - _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_TVal)); __builtin_zero_non_value_bits(_Local_ptr); - _Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_TVal)) == 0; } else { - _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) == 0; } #else // _CMPXCHG_MASK_OUT_PADDING_BITS - _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) == 0; #endif // _CMPXCHG_MASK_OUT_PADDING_BITS if (_Result) { - _CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_Ty)); + _CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_TVal)); } else { - _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_TVal)); } - _Unlock(); return _Result; } #if _HAS_CXX20 - void wait(_Ty _Expected, memory_order = memory_order_seq_cst) const noexcept { + void wait(_TVal _Expected, memory_order = memory_order_seq_cst) const noexcept { const auto _Storage_ptr = _STD addressof(_Storage); const auto _Expected_ptr = _STD addressof(_Expected); for (;;) { { - _Spinlock_guard _Lock{_Spinlock}; - if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) != 0) { + _Guard _Lock{_Spinlock}; + if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) != 0) { // contents differed, we might be done, check for padding #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Local; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Local; const auto _Local_ptr = _Local._Ptr(); - _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_TVal)); __builtin_zero_non_value_bits(_Local_ptr); __builtin_zero_non_value_bits(_Expected_ptr); - if (_CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0) { + if (_CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_TVal)) == 0) { // _Storage differs from _Expected only by padding; copy the padding from _Storage into // _Expected - _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_TVal)); } else { // truly different, we're done return; @@ -539,8 +591,8 @@ struct _Atomic_storage { } } // unlock - __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_Ty), &_Spinlock, - &_Atomic_wait_compare_non_lock_free, _Atomic_wait_no_timeout); + __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_TVal), &_Spinlock, + &_Atomic_wait_compare_non_lock_free, _Atomic_wait_no_timeout); } } @@ -554,43 +606,37 @@ struct _Atomic_storage { #endif // _HAS_CXX20 #if 1 // TRANSITION, ABI - void _Lock() const noexcept { // lock the spinlock - _Atomic_lock_spinlock(_Spinlock); - } - - void _Unlock() const noexcept { // unlock the spinlock - _Atomic_unlock_spinlock(_Spinlock); +protected: + void _Init_spinlock_for_ref() noexcept { + _Spinlock = __std_atomic_get_mutex(_STD addressof(_Storage)); } private: - mutable long _Spinlock = 0; + // Spinlock integer for non-lock-free atomic. mutex pointer for non-lock-free atomic_ref + mutable typename _Atomic_storage_types<_Ty>::_Spinlock _Spinlock{}; public: _Ty _Storage{}; #else // ^^^ don't break ABI / break ABI vvv - void _Lock() const noexcept { // lock the spinlock - _Smtx_lock_exclusive(&_Spinlock); - } - - void _Unlock() const noexcept { // unlock the spinlock - _Smtx_unlock_exclusive(&_Spinlock); - } - _Ty _Storage; - mutable _Smtx_t _Spinlock = 0; + mutable _Smtx_t _Mutex{}; #endif // TRANSITION, ABI }; template struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_ARM) || defined(_M_ARM64) @@ -602,7 +648,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -625,36 +671,36 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); char _As_bytes = __iso_volatile_load8(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); char _As_bytes = __iso_volatile_load8(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order char _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order char _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation char _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const char _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { @@ -683,7 +729,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -696,18 +742,22 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_ARM) || defined(_M_ARM64) @@ -719,7 +769,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -742,35 +792,35 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); short _As_bytes = __iso_volatile_load16(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); short _As_bytes = __iso_volatile_load16(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order short _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order short _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation short _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + _Storage_for<_TVal> _Mask{_Form_mask}; const short _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { @@ -781,7 +831,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -799,7 +849,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -812,18 +862,22 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency #if defined(_M_ARM) || defined(_M_ARM64) _Memory_barrier(); __iso_volatile_store32(_Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); @@ -833,7 +887,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -856,35 +910,35 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); auto _As_bytes = __iso_volatile_load32(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); auto _As_bytes = __iso_volatile_load32(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { @@ -895,7 +949,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -908,12 +962,12 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics return true; } - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -926,18 +980,22 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_IX86) @@ -953,7 +1011,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics #endif // _M_ARM64 } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -976,7 +1034,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); long long _As_bytes; #ifdef _M_ARM @@ -986,10 +1044,10 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _As_bytes = __iso_volatile_load64(_Mem); _Compiler_or_memory_barrier(); #endif - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); #ifdef _M_ARM long long _As_bytes = __ldrexd(_Mem); @@ -997,36 +1055,36 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics long long _As_bytes = __iso_volatile_load64(_Mem); #endif _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } #if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with (effectively) sequential consistency - _Ty _Temp{load()}; + _TVal _Temp{load()}; while (!compare_exchange_strong(_Temp, _Value, _Order)) { // keep trying } return _Temp; } #else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } #endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order long long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long long _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const long long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { @@ -1038,7 +1096,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -1051,12 +1109,12 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return true; } - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -1069,35 +1127,37 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; -#if 0 // TRANSITION, ABI -#if defined(_M_X64) || defined(_M_ARM64) +#ifdef _WIN64 template -struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics +struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics + // TRANSITION, ABI: replace '_Ty&' with '_Ty' in this specialization + using _TVal = remove_reference_t<_Ty&>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty&, const _TVal> _Value) noexcept : _Storage{_Value} {} // non-atomically initialize this atomic - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency (void) exchange(_Value); } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order _Check_store_memory_order(_Order); (void) exchange(_Value, _Order); } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency long long* const _Storage_ptr = const_cast(_Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 (void) _STD_COMPARE_EXCHANGE_128(_Storage_ptr, 0, 0, &_Result._Low); - return reinterpret_cast<_Ty&>(_Result); + return reinterpret_cast<_TVal&>(_Result); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order #ifdef _M_ARM64 long long* const _Storage_ptr = const_cast(_Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 @@ -1119,36 +1179,71 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics break; } - return reinterpret_cast<_Ty&>(_Result); + return reinterpret_cast<_TVal&>(_Result); #else // ^^^ _M_ARM64 / _M_X64 vvv _Check_load_memory_order(_Order); return load(); #endif // _M_ARM64 } - _Ty exchange(const _Ty _Value) noexcept { // exchange with sequential consistency - _Ty _Result{_Value}; + _TVal exchange(const _TVal _Value) noexcept { // exchange with sequential consistency + _TVal _Result{_Value}; while (!compare_exchange_strong(_Result, _Value)) { // keep trying } return _Result; } - _Ty exchange(const _Ty _Value, const memory_order _Order) noexcept { // exchange with given memory order - _Ty _Result{_Value}; + _TVal exchange(const _TVal _Value, const memory_order _Order) noexcept { // exchange with given memory order + _TVal _Result{_Value}; while (!compare_exchange_strong(_Result, _Value, _Order)) { // keep trying } return _Result; } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order _Int128 _Desired_bytes{}; - _CSTD memcpy(&_Desired_bytes, _STD addressof(_Desired), sizeof(_Ty)); + _CSTD memcpy(&_Desired_bytes, _STD addressof(_Desired), sizeof(_TVal)); _Int128 _Expected_temp{}; - _CSTD memcpy(&_Expected_temp, _STD addressof(_Expected), sizeof(_Ty)); + _CSTD memcpy(&_Expected_temp, _STD addressof(_Expected), sizeof(_TVal)); unsigned char _Result; +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Int128 _Expected_originally{}; + _CSTD memcpy(&_Expected_originally, _STD addressof(_Expected), sizeof(_TVal)); + + _Storage_for<_TVal> _Mask{_Form_mask}; + _Int128 _Mask_val{}; + _CSTD memcpy(&_Mask_val, _Mask._Ptr(), sizeof(_TVal)); + for (;;) { +#ifdef _M_ARM64 + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, + _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, + &_Expected_temp._Low); +#else // ^^^ _M_ARM64 / _M_X64 vvv + (void) _Order; + _Result = _STD_COMPARE_EXCHANGE_128(&reinterpret_cast(_Storage), _Desired_bytes._High, + _Desired_bytes._Low, &_Expected_temp._Low); +#endif // _M_ARM64 + if (_Result) { + return true; + } + + if (((_Expected_temp._Low ^ _Expected_originally._Low) & _Mask_val._Low) != 0 + || ((_Expected_temp._High ^ _Expected_originally._High) & _Mask_val._High) != 0) { + _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_TVal)); + return false; + } + + _Expected_temp._Low = + (_Expected_originally._Low & _Mask_val._Low) | (_Expected_temp._Low & ~_Mask_val._Low); + _Expected_temp._High = + (_Expected_originally._High & _Mask_val._High) | (_Expected_temp._High & ~_Mask_val._High); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS #ifdef _M_ARM64 _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); @@ -1158,13 +1253,43 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics &reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // _M_ARM64 if (_Result == 0) { - _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_TVal)); } return _Result != 0; } #if _HAS_CXX20 + void wait(_TVal _Expected, memory_order _Order = memory_order_seq_cst) const noexcept { + const auto _Storage_ptr = _STD addressof(_Storage); + const auto _Expected_ptr = _STD addressof(_Expected); + _Int128 _Expected_bytes = reinterpret_cast(_Expected); + + for (;;) { + const _TVal _Observed = load(_Order); + _Int128 _Observed_bytes = reinterpret_cast(_Observed); + if (_Observed_bytes._Low != _Expected_bytes._Low || _Observed_bytes._High != _Expected_bytes._High) { +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; + const _Int128 _Mask_val = reinterpret_cast(_Mask._Ref()); + + if (((_Expected_bytes._Low ^ _Observed_bytes._Low) & _Mask_val._Low) == 0 + && ((_Expected_bytes._High ^ _Observed_bytes._High) & _Mask_val._High) == 0) { + _Expected_bytes = _Observed_bytes; + continue; + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + + return; + } + + __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_TVal), nullptr, + &_Atomic_wait_compare_16_bytes, _Atomic_wait_no_timeout); + } + } + void notify_one() noexcept { __std_atomic_notify_one_indirect(_STD addressof(_Storage)); } @@ -1179,10 +1304,9 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics long long _High; }; - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty&>::_TStorage _Storage; }; -#endif // defined(_M_X64) || defined(_M_ARM64) -#endif // TRANSITION, ABI +#endif // _WIN64 // STRUCT TEMPLATE _Atomic_integral template @@ -1191,302 +1315,310 @@ struct _Atomic_integral; // not defined template struct _Atomic_integral<_Ty, 1> : _Atomic_storage<_Ty> { // atomic integral operations using 1-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedAnd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedXor8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { - return static_cast<_Ty>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); + _TVal operator++(int) noexcept { + return static_cast<_TVal>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); } - _Ty operator++() noexcept { + _TVal operator++() noexcept { unsigned char _Before = static_cast(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); ++_Before; - return static_cast<_Ty>(_Before); + return static_cast<_TVal>(_Before); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { return static_cast<_Ty>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), -1)); } - _Ty operator--() noexcept { + _TVal operator--() noexcept { unsigned char _Before = static_cast(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), -1)); --_Before; - return static_cast<_Ty>(_Before); + return static_cast<_TVal>(_Before); } }; template struct _Atomic_integral<_Ty, 2> : _Atomic_storage<_Ty> { // atomic integral operations using 2-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned short _After = static_cast(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned short _After = static_cast(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); } }; template struct _Atomic_integral<_Ty, 4> : _Atomic_storage<_Ty> { // atomic integral operations using 4-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedAnd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedXor, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned long _After = static_cast(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned long _After = static_cast(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); } }; template struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral operations using 8-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ #if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp + _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp & _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp | _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp ^ _Operand, _Order)) { // keep trying } return _Temp; } - _Ty operator++(int) noexcept { - return fetch_add(static_cast<_Ty>(1)); + _TVal operator++(int) noexcept { + return fetch_add(static_cast<_TVal>(1)); } - _Ty operator++() noexcept { - return fetch_add(static_cast<_Ty>(1)) + static_cast<_Ty>(1); + _TVal operator++() noexcept { + return fetch_add(static_cast<_TVal>(1)) + static_cast<_TVal>(1); } - _Ty operator--(int) noexcept { - return fetch_add(static_cast<_Ty>(-1)); + _TVal operator--(int) noexcept { + return fetch_add(static_cast<_TVal>(-1)); } - _Ty operator--() noexcept { - return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); + _TVal operator--() noexcept { + return fetch_add(static_cast<_TVal>(-1)) - static_cast<_TVal>(1); } #else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedOr64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned long long _After = static_cast(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned long long _After = static_cast(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); } #endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ }; @@ -1644,6 +1776,85 @@ struct _Atomic_integral_facade : _Atomic_integral<_Ty> { } }; +// STRUCT TEMPLATE _Atomic_integral_facade +template +struct _Atomic_integral_facade<_Ty&> : _Atomic_integral<_Ty&> { + // provides operator overloads and other support for atomic integral specializations + using _Base = _Atomic_integral<_Ty&>; + using difference_type = _Ty; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_integral_facade() = default; + /* implicit */ constexpr _Atomic_integral_facade(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _NODISCARD static _Ty _Negate(const _Ty _Value) noexcept { // returns two's complement negated value of _Value + return static_cast<_Ty>(0U - static_cast>(_Value)); + } + + _Ty fetch_add(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_add(_Operand); + } + + _Ty fetch_add(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_add(_Operand, _Order); + } + + _Ty fetch_sub(const _Ty _Operand) const noexcept { + return fetch_add(_Negate(_Operand)); + } + + _Ty fetch_sub(const _Ty _Operand, const memory_order _Order) const noexcept { + return fetch_add(_Negate(_Operand), _Order); + } + + _Ty operator+=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_add(_Operand) + _Operand); + } + + _Ty operator-=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_sub(_Operand) - _Operand); + } + + _Ty fetch_and(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_and(_Operand); + } + + _Ty fetch_and(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_and(_Operand, _Order); + } + + _Ty fetch_or(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_or(_Operand); + } + + _Ty fetch_or(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_or(_Operand, _Order); + } + + _Ty fetch_xor(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_xor(_Operand); + } + + _Ty fetch_xor(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_xor(_Operand, _Order); + } + + _Ty operator&=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_and(_Operand) & _Operand); + } + + _Ty operator|=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_or(_Operand) | _Operand); + } + + _Ty operator^=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_xor(_Operand) ^ _Operand); + } +}; + #if _HAS_CXX20 template struct _Atomic_floating : _Atomic_storage<_Ty> { @@ -1703,6 +1914,46 @@ struct _Atomic_floating : _Atomic_storage<_Ty> { return const_cast<_Atomic_floating*>(this)->fetch_sub(_Operand) - _Operand; } }; + +template +struct _Atomic_floating<_Ty&> : _Atomic_storage<_Ty&> { + // provides atomic floating-point operations + using _Base = _Atomic_storage<_Ty&>; + using difference_type = _Ty; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_floating() = default; + /* implicit */ constexpr _Atomic_floating(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Ty _Temp{this->load(memory_order_relaxed)}; + while (!const_cast<_Atomic_floating*>(this)->_Base::compare_exchange_strong( + _Temp, _Temp + _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty fetch_sub(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Ty _Temp{this->load(memory_order_relaxed)}; + while (!const_cast<_Atomic_floating*>(this)->_Base::compare_exchange_strong( + _Temp, _Temp - _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty operator+=(const _Ty _Operand) const noexcept { + return fetch_add(_Operand) + _Operand; + } + + _Ty operator-=(const _Ty _Operand) const noexcept { + return fetch_sub(_Operand) - _Operand; + } +}; #endif // _HAS_CXX20 // STRUCT TEMPLATE _Atomic_pointer @@ -1807,23 +2058,85 @@ struct _Atomic_pointer : _Atomic_storage<_Ty> { } }; + +// STRUCT TEMPLATE _Atomic_pointer +template +struct _Atomic_pointer<_Ty&> : _Atomic_storage<_Ty&> { + using _Base = _Atomic_storage<_Ty&>; + using difference_type = ptrdiff_t; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_pointer() = default; + /* implicit */ constexpr _Atomic_pointer(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _Ty fetch_add(const ptrdiff_t _Diff, const memory_order _Order = memory_order_seq_cst) const noexcept { + const ptrdiff_t _Shift_bytes = + static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); + ptrdiff_t _Result; +#if defined(_M_IX86) || defined(_M_ARM) + _ATOMIC_CHOOSE_INTRINSIC( + _Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); +#else // ^^^ 32 bits / 64 bits vvv + _ATOMIC_CHOOSE_INTRINSIC( + _Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); +#endif // hardware + return reinterpret_cast<_Ty>(_Result); + } + + _Ty fetch_sub(const ptrdiff_t _Diff) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff))); + } + + _Ty fetch_sub(const ptrdiff_t _Diff, const memory_order _Order) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff)), _Order); + } + + _Ty operator++(int) const noexcept { + return fetch_add(1); + } + + _Ty operator++() const noexcept { + return fetch_add(1) + 1; + } + + _Ty operator--(int) const noexcept { + return fetch_add(-1); + } + + _Ty operator--() const noexcept { + return fetch_add(-1) - 1; + } + + _Ty operator+=(const ptrdiff_t _Diff) const noexcept { + return fetch_add(_Diff) + _Diff; + } + + _Ty operator-=(const ptrdiff_t _Diff) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff))) - _Diff; + } +}; + + // STRUCT TEMPLATE atomic #define ATOMIC_VAR_INIT(_Value) \ { _Value } -template +template using _Choose_atomic_base2_t = - typename _Select && !is_same_v>::template _Apply<_Atomic_integral_facade<_Ty>, - typename _Select && is_object_v>>::template _Apply<_Atomic_pointer<_Ty>, - _Atomic_storage<_Ty>>>; + typename _Select && !is_same_v>::template _Apply<_Atomic_integral_facade<_Ty>, + typename _Select && is_object_v>>::template _Apply< + _Atomic_pointer<_Ty>, _Atomic_storage<_Ty>>>; #if _HAS_CXX20 -template -using _Choose_atomic_base_t = - typename _Select>::template _Apply<_Atomic_floating<_Ty>, _Choose_atomic_base2_t<_Ty>>; +template +using _Choose_atomic_base_t = typename _Select>::template _Apply<_Atomic_floating<_Ty>, + _Choose_atomic_base2_t<_TVal, _Ty>>; #else // ^^^ _HAS_CXX20 // !_HAS_CXX20 vvv -template -using _Choose_atomic_base_t = _Choose_atomic_base2_t<_Ty>; +template +using _Choose_atomic_base_t = _Choose_atomic_base2_t<_TVal, _Ty>; #endif //_HAS_CXX20 template @@ -2010,6 +2323,121 @@ template atomic(_Ty) -> atomic<_Ty>; #endif // _HAS_CXX17 +#if _HAS_CXX20 +template +struct atomic_ref : _Choose_atomic_base_t<_Ty, _Ty&> { // atomic reference +private: + using _Base = _Choose_atomic_base_t<_Ty, _Ty&>; + +public: + // clang-format off + static_assert(is_trivially_copyable_v<_Ty> && is_copy_constructible_v<_Ty> && is_move_constructible_v<_Ty> + && is_copy_assignable_v<_Ty> && is_move_assignable_v<_Ty>, + "atomic_ref requires T to be trivially copyable, copy constructible, move constructible, copy assignable, " + "and move assignable."); + // clang-format on + + using value_type = _Ty; + + explicit atomic_ref(_Ty& _Value) noexcept /* strengthened */ : _Base(_Value) { + if constexpr (_Is_potentially_lock_free) { + _Check_alignment(_Value); + } else { + this->_Init_spinlock_for_ref(); + } + } + + atomic_ref(const atomic_ref&) noexcept = default; + + atomic_ref& operator=(const atomic_ref&) = delete; + + static constexpr bool is_always_lock_free = _Is_always_lock_free; + + static constexpr bool _Is_potentially_lock_free = + sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; + + static constexpr size_t required_alignment = _Is_potentially_lock_free ? sizeof(_Ty) : alignof(_Ty); + + _NODISCARD bool is_lock_free() const noexcept { +#if _ATOMIC_HAS_DCAS + return is_always_lock_free; +#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv + if constexpr (is_always_lock_free) { + return true; + } else { + return __std_atomic_has_cmpxchg16b() != 0; + } +#endif // _ATOMIC_HAS_DCAS + } + + void store(const _Ty _Value) const noexcept { + const_cast(this)->_Base::store(_Value); + } + + void store(const _Ty _Value, const memory_order _Order) const noexcept { + const_cast(this)->_Base::store(_Value, _Order); + } + + _Ty operator=(const _Ty _Value) const noexcept { + store(_Value); + return _Value; + } + + _Ty exchange(const _Ty _Value) const noexcept { + return const_cast(this)->_Base::exchange(_Value); + } + + _Ty exchange(const _Ty _Value, const memory_order _Order) const noexcept { + return const_cast(this)->_Base::exchange(_Value, _Order); + } + + bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired) const noexcept { + return const_cast(this)->_Base::compare_exchange_strong(_Expected, _Desired); + } + + bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order) const noexcept { + return const_cast(this)->_Base::compare_exchange_strong(_Expected, _Desired, _Order); + } + + bool compare_exchange_strong( + _Ty& _Expected, const _Ty _Desired, const memory_order _Success, const memory_order _Failure) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Combine_cas_memory_orders(_Success, _Failure)); + } + + bool compare_exchange_weak(_Ty& _Expected, const _Ty _Desired) const noexcept { + return compare_exchange_strong(_Expected, _Desired); + } + + bool compare_exchange_weak(_Ty& _Expected, const _Ty _Desired, const memory_order _Order) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Order); + } + + bool compare_exchange_weak( + _Ty& _Expected, const _Ty _Desired, const memory_order _Success, const memory_order _Failure) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Combine_cas_memory_orders(_Success, _Failure)); + } + + operator _Ty() const noexcept { + return this->load(); + } + + void notify_one() const noexcept { + const_cast(this)->_Base::notify_one(); + } + + void notify_all() const noexcept { + const_cast(this)->_Base::notify_all(); + } + +private: + static void _Check_alignment([[maybe_unused]] const _Ty& _Value) { + _ATOMIC_REF_CHECK_ALIGNMENT( + (reinterpret_cast(_STD addressof(_Value)) & (required_alignment - 1)) == 0, + "atomic_ref underlying object is not aligned as required_alignment"); + } +}; +#endif // _HAS_CXX20 + // NONMEMBER OPERATIONS ON ATOMIC TYPES template _NODISCARD bool atomic_is_lock_free(const volatile atomic<_Ty>* _Mem) noexcept { diff --git a/stl/inc/charconv b/stl/inc/charconv index 905d5b00e5..f222d291e8 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -1327,9 +1327,11 @@ _NODISCARD errc _Assemble_floating_point_value(const uint64_t _Initial_mantissa, // We detect this case here and re-adjust the mantissa and exponent appropriately, to form a normal number: if (_Mantissa > _Traits::_Denormal_mantissa_mask) { - // We add one to the _Denormal_mantissa_shift to account for the hidden mantissa bit - // (we subtracted one to account for this bit when we computed the _Denormal_mantissa_shift above). - _Exponent = _Initial_exponent - (_Denormal_mantissa_shift + 1) - _Normal_mantissa_shift; + // The mantissa is already in the correct position for a normal value. (The carried over bit when we + // added 1 to round the mantissa is in the correct position for the hidden bit.) + // _Denormal_mantissa_shift is the actual number of bits by which we have shifted the mantissa into its + // final position. + _Exponent = _Initial_exponent - _Denormal_mantissa_shift; } } else { _Mantissa <<= _Denormal_mantissa_shift; diff --git a/stl/inc/iterator b/stl/inc/iterator index fee3511a90..c2927f0d72 100644 --- a/stl/inc/iterator +++ b/stl/inc/iterator @@ -511,6 +511,311 @@ private: bool _Failed; // true if any stores have failed streambuf_type* _Strbuf; // the wrapped stream buffer }; + +#ifdef __cpp_lib_concepts +// CLASS TEMPLATE counted_iterator +template +class counted_iterator { +public: + using iterator_type = _Iter; + + // [counted.iter.const] + constexpr counted_iterator() = default; + constexpr counted_iterator(_Iter _Right, const iter_difference_t<_Iter> _Diff) noexcept( + is_nothrow_move_constructible_v<_Iter>) // strengthened + : _Current(_STD move(_Right)), _Length(_Diff) { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Diff >= 0, "counted_iterator requires non-negative length n"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + } + + // clang-format off + template + requires convertible_to + constexpr counted_iterator(const counted_iterator<_Other>& _Right) noexcept( + is_nothrow_constructible_v<_Iter, const _Other&>) // strengthened + : _Current(_Right._Current), _Length(_Right._Length) {} + + template + requires assignable_from<_Iter&, const _Other&> + constexpr counted_iterator& operator=(const counted_iterator<_Other>& _Right) noexcept( + is_nothrow_assignable_v<_Iter&, const _Other&>) /* strengthened */ { + // clang-format on + _Current = _Right._Current; + _Length = _Right._Length; + return *this; + } + + // [counted.iter.access] + _NODISCARD constexpr _Iter base() const& noexcept(is_nothrow_copy_constructible_v<_Iter>) /* strengthened */ + requires copy_constructible<_Iter> { + return _Current; + } + + _NODISCARD constexpr _Iter base() && noexcept(is_nothrow_move_constructible_v<_Iter>) /* strengthened */ { + return _STD move(_Current); + } + + _NODISCARD constexpr iter_difference_t<_Iter> count() const noexcept { + return _Length; + } + + // [counted.iter.elem] + _NODISCARD constexpr decltype(auto) operator*() noexcept(noexcept(*_Current)) /* strengthened */ { +#if _ITERATOR_DEBUG_LEVEL != 0 + // Per proposed resolution of LWG-3472 + _STL_VERIFY(_Length > 0, "counted_iterator dereference beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return *_Current; + } + + _NODISCARD constexpr decltype(auto) operator*() const noexcept(noexcept(*_Current)) /* strengthened */ + requires _Dereferenceable { +#if _ITERATOR_DEBUG_LEVEL != 0 + // Per proposed resolution of LWG-3472 + _STL_VERIFY(_Length > 0, "counted_iterator dereference beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return *_Current; + } + + _NODISCARD constexpr decltype(auto) operator[](const iter_difference_t<_Iter> _Diff) const + requires random_access_iterator<_Iter> { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Diff < _Length, "counted_iterator index out of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return _Current[_Diff]; + } + + // [counted.iter.nav] + constexpr counted_iterator& operator++() { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Length > 0, "counted_iterator increment beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + ++_Current; + --_Length; + return *this; + } + + constexpr decltype(auto) operator++(int) { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Length > 0, "counted_iterator increment beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + --_Length; + _TRY_BEGIN + return _Current++; + + _CATCH_ALL + ++_Length; + + _RERAISE; + _CATCH_END + } + + constexpr counted_iterator operator++(int) requires forward_iterator<_Iter> { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Length > 0, "counted_iterator increment beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + counted_iterator _Tmp = *this; + ++_Current; + --_Length; + return _Tmp; + } + + constexpr counted_iterator& operator--() requires bidirectional_iterator<_Iter> { + --_Current; + ++_Length; + return *this; + } + + constexpr counted_iterator operator--(int) requires bidirectional_iterator<_Iter> { + counted_iterator _Tmp = *this; + --_Current; + ++_Length; + return _Tmp; + } + + _NODISCARD constexpr counted_iterator operator+(const iter_difference_t<_Iter> _Diff) const + requires random_access_iterator<_Iter> { + return counted_iterator{_Current + _Diff, static_cast>(_Length - _Diff)}; + } + + _NODISCARD friend constexpr counted_iterator operator+( + const iter_difference_t<_Iter> _Diff, const counted_iterator& _Right) requires random_access_iterator<_Iter> { + return counted_iterator{_Right._Current + _Diff, static_cast>(_Right._Length - _Diff)}; + } + + constexpr counted_iterator& operator+=( + const iter_difference_t<_Iter> _Diff) requires random_access_iterator<_Iter> { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Diff <= _Length, "counted_iterator seek beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + _Current += _Diff; + _Length -= _Diff; + return *this; + } + + _NODISCARD constexpr counted_iterator operator-(const iter_difference_t<_Iter> _Diff) const + requires random_access_iterator<_Iter> { + return counted_iterator{_Current - _Diff, static_cast>(_Length + _Diff)}; + } + + template _Other> + _NODISCARD friend constexpr iter_difference_t<_Other> operator-( + const counted_iterator& _Left, const counted_iterator<_Other>& _Right) noexcept /* strengthened */ { +#if _ITERATOR_DEBUG_LEVEL != 0 + _Same_sequence(_Left, _Right); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return _Right.count() - _Left._Length; + } + + _NODISCARD friend constexpr iter_difference_t<_Iter> operator-( + const counted_iterator& _Left, default_sentinel_t) noexcept /* strengthened */ { + return -_Left._Length; + } + + _NODISCARD friend constexpr iter_difference_t<_Iter> operator-( + default_sentinel_t, const counted_iterator& _Right) noexcept /* strengthened */ { + return _Right._Length; + } + + constexpr counted_iterator& operator-=( + const iter_difference_t<_Iter> _Diff) requires random_access_iterator<_Iter> { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(-_Diff <= _Length, "counted_iterator decrement beyond end of range"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + _Current -= _Diff; + _Length += _Diff; + return *this; + } + + // [counted.iter.cmp] + template _Other> + _NODISCARD friend constexpr bool operator==( + const counted_iterator& _Left, const counted_iterator<_Other>& _Right) noexcept /* strengthened */ { +#if _ITERATOR_DEBUG_LEVEL != 0 + _Same_sequence(_Left, _Right); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return _Left._Length == _Right.count(); + } + + _NODISCARD friend constexpr bool operator==(const counted_iterator& _Left, default_sentinel_t) noexcept + /* strengthened */ { + return _Left._Length == 0; + } + + template _Other> + _NODISCARD friend constexpr strong_ordering operator<=>( + const counted_iterator& _Left, const counted_iterator<_Other>& _Right) noexcept /* strengthened */ { +#if _ITERATOR_DEBUG_LEVEL != 0 + _Same_sequence(_Left, _Right); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + return _Right.count() <=> _Left._Length; + } + + // [counted.iter.cust] + _NODISCARD friend constexpr iter_rvalue_reference_t<_Iter> iter_move(const counted_iterator& _Right) noexcept( + noexcept(_RANGES iter_move(_Right._Current))) requires input_iterator<_Iter> { + return _RANGES iter_move(_Right._Current); + } + + template _Other> + friend constexpr void iter_swap(const counted_iterator& _Left, const counted_iterator<_Other>& _Right) noexcept( + noexcept(_RANGES iter_swap(_Left._Current, _Right._Current))) { + _RANGES iter_swap(_Left._Current, _Right._Current); + } + + template _Other> + friend constexpr void _Same_sequence( + const counted_iterator& _Left, const counted_iterator<_Other>& _Right) noexcept { + // Per N4861 [counted.iterator]/2, two counted_iterators x and y refer to elements of the same sequence iff + // next(x.base(), x.count()) and next(y.base(), y.count()) "refer to the same element." Iterator equality is a + // fair proxy for this condition. + if constexpr (forward_iterator<_Iter> && forward_iterator<_Other>) { + using _CIter = common_type_t<_Iter, _Other>; + using _CDiff = common_type_t, iter_difference_t<_Other>>; + + const _CDiff _Diff = static_cast<_CDiff>(_Left._Length) - static_cast<_CDiff>(_Right.count()); + if (_Diff < 0) { + _STL_VERIFY( + static_cast<_CIter>(_Left._Current) == _RANGES next(static_cast<_CIter>(_Right.base()), -_Diff), + "counted_iterators from different ranges"); + } else { + _STL_VERIFY( + _RANGES next(static_cast<_CIter>(_Left._Current), _Diff) == static_cast<_CIter>(_Right.base()), + "counted_iterators from different ranges"); + } + } + } + + template _Other> + friend constexpr void _Verify_range(const counted_iterator& _Left, const counted_iterator<_Other>& _Right) { + if constexpr (_Range_verifiable_v<_Iter, _Other>) { + _Verify_range(_Left._Current, _Right._Current); + } +#if _ITERATOR_DEBUG_LEVEL != 0 + _Same_sequence(_Left, _Right); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + } + + constexpr void _Verify_offset(const iter_difference_t<_Iter> _Off) const { +#if _ITERATOR_DEBUG_LEVEL != 0 + _STL_VERIFY(_Off <= _Length, "Offset larger than counted_iterator length"); +#endif // _ITERATOR_DEBUG_LEVEL != 0 + if constexpr (_Offset_verifiable_v<_Iter>) { + _Current._Verify_offset(_Off); + } + } + + using _Prevent_inheriting_unwrap = counted_iterator; + + _NODISCARD constexpr counted_iterator<_Unwrapped_t> + _Unwrapped() const& requires _Unwrappable_v { + return static_cast>>(_Current._Unwrapped()); + } + + _NODISCARD constexpr counted_iterator<_Unwrapped_t<_Iter>> _Unwrapped() && requires _Unwrappable_v { + return static_cast>>(_STD move(_Current)._Unwrapped()); + } + + static constexpr bool _Unwrap_when_unverified = _Do_unwrap_when_unverified_v<_Iter>; + + // clang-format off + template + requires _Wrapped_seekable_v<_Iter, const _Other&> + constexpr void _Seek_to(const counted_iterator<_Other>& _It) { + // clang-format on + _Current._Seek_to(_It._Current); + _Length = _It._Length; + } + + // clang-format off + template + requires _Wrapped_seekable_v<_Iter, _Other> + constexpr void _Seek_to(counted_iterator<_Other>&& _It) { + // clang-format on + _Current._Seek_to(_STD move(_It)._Current); + _Length = _It._Length; + } + +private: + template + friend class counted_iterator; + + _Iter _Current{}; + iter_difference_t<_Iter> _Length = 0; +}; + +template +struct incrementable_traits> { + using difference_type = iter_difference_t<_Iter>; +}; + +template +struct iterator_traits> : iterator_traits<_Iter> { + using pointer = void; +}; +#endif // __cpp_lib_concepts + _STD_END _STDEXT_BEGIN diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index 8498f18c1a..467469cda4 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -11,6 +11,9 @@ #include #include +#if defined(_WIN64) && (_MSC_FULL_VER < 192829203) // TRANSITION +#include // Visual Studio 2019 to define 128-bit CAS in +#endif // defined(_WIN64) && (_MSC_FULL_VER < 192829203), TRANSITION #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) diff --git a/stl/inc/xatomic_wait.h b/stl/inc/xatomic_wait.h index 266aae0c9f..caeb3fdaf9 100644 --- a/stl/inc/xatomic_wait.h +++ b/stl/inc/xatomic_wait.h @@ -49,7 +49,7 @@ void __stdcall __std_atomic_notify_all_direct(const void* _Storage) noexcept; // of one of those sizes whose value changes upon notify, hence "indirect". (As of 2020-07-24, this always uses the // fallback SRWLOCK and CONDITION_VARIABLE implementation but that is not contractual.) using _Atomic_wait_indirect_equal_callback_t = bool(__stdcall*)( - const void* _Storage, void* _Comparand, size_t _Size, void* _Param) noexcept; + const void* _Storage, void* _Comparand, size_t _Size, void* _Param) _NOEXCEPT_FNPTR; int __stdcall __std_atomic_wait_indirect(const void* _Storage, void* _Comparand, size_t _Size, void* _Param, _Atomic_wait_indirect_equal_callback_t _Are_equal, unsigned long _Remaining_timeout) noexcept; diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index 1a70aa17ea..7943b6c484 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -208,6 +208,21 @@ _STL_DISABLE_CLANG_WARNINGS #define _STL_INTERNAL_STATIC_ASSERT(...) #endif // _ENABLE_STL_INTERNAL_CHECK +#ifndef _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK +#ifdef _DEBUG +#define _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK 1 +#else // ^^^ _DEBUG ^^^ // vvv !_DEBUG vvv +#define _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK 0 +#endif // _DEBUG +#endif // _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK + +#if _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK +#define _ATOMIC_REF_CHECK_ALIGNMENT(cond, mesg) _STL_VERIFY(cond, mesg) +#else +#define _ATOMIC_REF_CHECK_ALIGNMENT(cond, mesg) _Analysis_assume_(cond) +#endif + + #include #define _WARNING_MESSAGE(NUMBER, MESSAGE) __FILE__ "(" _CRT_STRINGIZE(__LINE__) "): warning " NUMBER ": " MESSAGE @@ -306,6 +321,22 @@ _STL_DISABLE_CLANG_WARNINGS #define _LOCK_DEBUG 3 #define _LOCK_AT_THREAD_EXIT 4 +#ifndef _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) +#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 1 +#else // ^^^ modern 64-bit // less modern or 32-bit vvv +#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 0 +#endif // _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B + +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) +#error ARM64 requires _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B to be 1. +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) + +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) +#error _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 requires 64-bit. +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) + #ifdef __cplusplus _STD_BEGIN enum _Uninitialized { // tag for suppressing initialization diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index 3239d8eb25..3043b0eec6 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -131,6 +131,7 @@ // Other C++17 deprecation warnings // _HAS_CXX20 directly controls: +// P0019R8 atomic_ref // P0020R6 atomic, atomic, atomic // P0122R7 // P0202R3 constexpr For And exchange() @@ -147,6 +148,7 @@ // P0482R6 Library Support For char8_t // (mbrtoc8 and c8rtomb not yet implemented) // P0487R1 Fixing operator>>(basic_istream&, CharT*) +// P0528R3 Atomic Compare-And-Exchange With Padding Bits // P0550R2 remove_cvref // P0553R4 Rotating And Counting Functions // P0556R3 Integral Power-Of-2 Operations (renamed by P1956R1) @@ -170,7 +172,6 @@ // (partially implemented) // P0898R3 Standard Library Concepts // P0912R5 Library Support For Coroutines -// (partially implemented, missing noop coroutines) // P0919R3 Heterogeneous Lookup For Unordered Containers // P0966R1 string::reserve() Should Not Shrink // P1001R2 execution::unseq @@ -182,6 +183,9 @@ // (except the std::invoke function which is implemented in C++17) // P1085R2 Removing span Comparisons // P1115R3 erase()/erase_if() Return size_type +// P1123R0 Atomic Compare-And-Exchange With Padding Bits For atomic_ref +// P1135R6 The C++20 Synchronization Library +// (partially implemented) // P1207R4 Movability Of Single-Pass Iterators // (partially implemented) // P1209R0 erase_if(), erase() @@ -207,6 +211,7 @@ // P1907R2 ranges::ssize // P1956R1 has_single_bit(), bit_ceil(), bit_floor(), bit_width() // P1959R0 Removing weak_equality And strong_equality +// P1960R0 atomic_ref Cleanup // P1964R2 Replacing boolean With boolean-testable // P1976R2 Explicit Constructors For Fixed-Extent span From Dynamic-Extent Ranges // P2091R0 Fixing Issues With Range Access CPOs @@ -1137,6 +1142,7 @@ #define __cpp_lib_atomic_flag_test 201907L #define __cpp_lib_atomic_float 201711L #define __cpp_lib_atomic_lock_free_type_aliases 201907L +#define __cpp_lib_atomic_ref 201806L #define __cpp_lib_atomic_shared_ptr 201711L #define __cpp_lib_atomic_wait 201907L #define __cpp_lib_bind_front 201907L @@ -1162,8 +1168,12 @@ #define __cpp_lib_constexpr_tuple 201811L #define __cpp_lib_constexpr_utility 201811L -#ifdef __cpp_impl_coroutine // TRANSITION, VS 2019 16.8 Preview 3 -#define __cpp_lib_coroutine 197000L +#ifdef __cpp_impl_coroutine // TRANSITION, Clang and EDG coroutine support +#if __cpp_impl_coroutine >= 201902L +#define __cpp_lib_coroutine 201902L +#else // ^^^ __cpp_impl_coroutine >= 201902L ^^^ / vvv __cpp_impl_coroutine < 201902L vvv +#define __cpp_lib_coroutine 197000L // TRANSITION, VS 2019 16.8 Preview 4 +#endif // ^^^ __cpp_impl_coroutine < 201902L ^^^ #endif // __cpp_impl_coroutine #define __cpp_lib_destroying_delete 201806L @@ -1262,13 +1272,18 @@ compiler option, or define _ALLOW_RTCc_IN_STL to acknowledge that you have recei #error In yvals_core.h, defined(MRTDLL) implies defined(_M_CEE_PURE); !defined(_M_CEE_PURE) implies !defined(MRTDLL) #endif // defined(MRTDLL) && !defined(_M_CEE_PURE) -#define _STL_WIN32_WINNT_WINXP 0x0501 // _WIN32_WINNT_WINXP from sdkddkver.h -#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h -#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WINXP 0x0501 // _WIN32_WINNT_WINXP from sdkddkver.h +#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h +#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WINBLUE 0x0603 // _WIN32_WINNT_WINBLUE from sdkddkver.h +#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h // Note that the STL DLL builds will set this to XP for ABI compatibility with VS2015 which supported XP. #ifndef _STL_WIN32_WINNT -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_ONECORE) || defined(_CRT_APP) +#if defined(_M_ARM64) +// The first ARM64 Windows was Windows 10 +#define _STL_WIN32_WINNT _STL_WIN32_WINNT_WIN10 +#elif defined(_M_ARM) || defined(_ONECORE) || defined(_CRT_APP) // The first ARM or OneCore or App Windows was Windows 8 #define _STL_WIN32_WINNT _STL_WIN32_WINNT_WIN8 #else // ^^^ default to Win8 // default to Vista vvv @@ -1277,5 +1292,11 @@ compiler option, or define _ALLOW_RTCc_IN_STL to acknowledge that you have recei #endif // ^^^ !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_ONECORE) && !defined(_CRT_APP) ^^^ #endif // _STL_WIN32_WINNT +#ifdef __cpp_noexcept_function_type +#define _NOEXCEPT_FNPTR noexcept +#else +#define _NOEXCEPT_FNPTR +#endif // __cpp_noexcept_function_type + #endif // _STL_COMPILER_PREPROCESSOR #endif // _YVALS_CORE_H_ diff --git a/stl/src/atomic.cpp b/stl/src/atomic.cpp index b2c0eeb00b..8d1b83fc83 100644 --- a/stl/src/atomic.cpp +++ b/stl/src/atomic.cpp @@ -5,32 +5,21 @@ #include -#include -#pragma warning(disable : 4793) +#include + +namespace { + // MUTEX FOR shared_ptr ATOMIC OPERATIONS + SRWLOCK _Shared_ptr_lock = SRWLOCK_INIT; +} // unnamed namespace _EXTERN_C -// SPIN LOCK FOR shared_ptr ATOMIC OPERATIONS -volatile long _Shared_ptr_flag; - -_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // spin until _Shared_ptr_flag successfully set -#ifdef _M_ARM - while (_InterlockedExchange_acq(&_Shared_ptr_flag, 1)) { - __yield(); - } -#else // _M_ARM - while (_interlockedbittestandset(&_Shared_ptr_flag, 0)) { // set bit 0 - } -#endif // _M_ARM +_CRTIMP2_PURE void __cdecl _Lock_shared_ptr_spin_lock() { // TRANSITION, ABI: "spin_lock" name is outdated + AcquireSRWLockExclusive(&_Shared_ptr_lock); } _CRTIMP2_PURE void __cdecl _Unlock_shared_ptr_spin_lock() { // release previously obtained lock -#ifdef _M_ARM - __dmb(_ARM_BARRIER_ISH); - __iso_volatile_store32(reinterpret_cast(&_Shared_ptr_flag), 0); -#else // _M_ARM - _interlockedbittestandreset(&_Shared_ptr_flag, 0); // reset bit 0 -#endif // _M_ARM + ReleaseSRWLockExclusive(&_Shared_ptr_lock); } _END_EXTERN_C diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index 0a45b12aee..9e6c9b19e2 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -198,6 +198,24 @@ namespace { } } #endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE + + _NODISCARD unsigned char __std_atomic_compare_exchange_128_fallback(_Inout_bytecount_(16) long long* _Destination, + _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, + _Inout_bytecount_(16) long long* _ComparandResult) noexcept { + static SRWLOCK _Mtx = SRWLOCK_INIT; + _SrwLock_guard _Guard{_Mtx}; + if (_Destination[0] == _ComparandResult[0] && _Destination[1] == _ComparandResult[1]) { + _ComparandResult[0] = _Destination[0]; + _ComparandResult[1] = _Destination[1]; + _Destination[0] = _ExchangeLow; + _Destination[1] = _ExchangeHigh; + return static_cast(true); + } else { + _ComparandResult[0] = _Destination[0]; + _ComparandResult[1] = _Destination[1]; + return static_cast(false); + } + } } // unnamed namespace @@ -339,4 +357,62 @@ __std_atomic_api_level __stdcall __std_atomic_set_api_level(__std_atomic_api_lev return _Acquire_wait_functions(); #endif // !_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE } + +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier +_Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { + constexpr size_t _Table_size_power = 8; + constexpr size_t _Table_size = 1 << _Table_size_power; + constexpr size_t _Table_index_mask = _Table_size - 1; + + struct alignas(std::hardware_destructive_interference_size) _Table_entry { + _Smtx_t _Mutex; + }; + + static _Table_entry _Table[_Table_size]{}; + + auto _Index = reinterpret_cast(_Key); + _Index ^= _Index >> (_Table_size_power * 2); + _Index ^= _Index >> _Table_size_power; + return &_Table[_Index & _Table_index_mask]._Mutex; +} +#pragma warning(pop) + +_NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128(_Inout_bytecount_(16) long long* _Destination, + _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, + _Inout_bytecount_(16) long long* _ComparandResult) noexcept { +#if !defined(_WIN64) + return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 + return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv + if (__std_atomic_has_cmpxchg16b()) { + return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); + } + + return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 +} + +_NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept { +#if !defined(_WIN64) + return false; +#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 + return true; +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv + constexpr char _Cmpxchg_Absent = 0; + constexpr char _Cmpxchg_Present = 1; + constexpr char _Cmpxchg_Unknown = 2; + + static std::atomic _Cached_value{_Cmpxchg_Unknown}; + + char _Value = _Cached_value.load(std::memory_order_relaxed); + if (_Value == _Cmpxchg_Unknown) { + _Value = IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) ? _Cmpxchg_Present : _Cmpxchg_Absent; + _Cached_value.store(_Value, std::memory_order_relaxed); + } + + return _Value; +#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 +} _END_EXTERN_C diff --git a/stl/src/msvcp_atomic_wait.src b/stl/src/msvcp_atomic_wait.src index ec335cc161..d8f2d843b5 100644 --- a/stl/src/msvcp_atomic_wait.src +++ b/stl/src/msvcp_atomic_wait.src @@ -6,14 +6,17 @@ LIBRARY LIBRARYNAME EXPORTS - __std_atomic_wait_get_deadline - __std_atomic_wait_get_remaining_timeout + __std_atomic_compare_exchange_128 + __std_atomic_get_mutex + __std_atomic_has_cmpxchg16b __std_atomic_notify_all_direct __std_atomic_notify_all_indirect __std_atomic_notify_one_direct __std_atomic_notify_one_indirect __std_atomic_set_api_level __std_atomic_wait_direct + __std_atomic_wait_get_deadline + __std_atomic_wait_get_remaining_timeout __std_atomic_wait_indirect __std_bulk_submit_threadpool_work __std_close_threadpool_work diff --git a/tests/libcxx/expected_results.txt b/tests/libcxx/expected_results.txt index 5d3dc61e13..97eb5036ed 100644 --- a/tests/libcxx/expected_results.txt +++ b/tests/libcxx/expected_results.txt @@ -49,6 +49,7 @@ std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_ std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp FAIL # libc++ doesn't yet implement P1423R3, so it expects an old value for `__cpp_lib_char8_t` +std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/istream.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/limits.version.pass.cpp FAIL @@ -252,9 +253,6 @@ std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp SKI # *** MISSING STL FEATURES *** -# C++20 P0019R8 "atomic_ref" -std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp FAIL - # C++20 P0355R7 " Calendars And Time Zones" std/utilities/time/days.pass.cpp FAIL std/utilities/time/months.pass.cpp FAIL diff --git a/tests/libcxx/skipped_tests.txt b/tests/libcxx/skipped_tests.txt index 9d43be173d..0aceecdbb5 100644 --- a/tests/libcxx/skipped_tests.txt +++ b/tests/libcxx/skipped_tests.txt @@ -49,6 +49,7 @@ containers\unord\unord.map\unord.map.modifiers\insert_and_emplace_allocator_requ containers\unord\unord.set\insert_and_emplace_allocator_requirements.pass.cpp # libc++ doesn't yet implement P1423R3, so it expects an old value for `__cpp_lib_char8_t` +language.support\support.limits\support.limits.general\atomic.version.pass.cpp language.support\support.limits\support.limits.general\filesystem.version.pass.cpp language.support\support.limits\support.limits.general\istream.version.pass.cpp language.support\support.limits\support.limits.general\limits.version.pass.cpp @@ -252,9 +253,6 @@ utilities\memory\default.allocator\allocator.members\allocate.verify.cpp # *** MISSING STL FEATURES *** -# C++20 P0019R8 "atomic_ref" -language.support\support.limits\support.limits.general\atomic.version.pass.cpp - # C++20 P0355R7 " Calendars And Time Zones" utilities\time\days.pass.cpp utilities\time\months.pass.cpp diff --git a/tests/std/include/range_algorithm_support.hpp b/tests/std/include/range_algorithm_support.hpp index 1c03e882c5..7b71a423a0 100644 --- a/tests/std/include/range_algorithm_support.hpp +++ b/tests/std/include/range_algorithm_support.hpp @@ -340,8 +340,9 @@ namespace test { static constexpr bool at_least = derived_from; using ReferenceType = conditional_t, Element&>; - public: + using Consterator = iterator; + // output iterator operations iterator() = default; @@ -353,6 +354,10 @@ namespace test { return *this; } + constexpr operator Consterator() && noexcept { + return Consterator{exchange(ptr_, nullptr)}; + } + [[nodiscard]] constexpr Element* peek() const noexcept { return ptr_; } @@ -426,6 +431,12 @@ namespace test { // sentinel operations (implied by forward iterator): iterator(iterator const&) requires (to_bool(Eq)) = default; iterator& operator=(iterator const&) requires (to_bool(Eq)) = default; + + constexpr operator Consterator() const& noexcept + requires (to_bool(Eq)) { + return Consterator{ptr_}; + } + [[nodiscard]] constexpr boolish operator==(iterator const& that) const noexcept requires (to_bool(Eq)) { return {ptr_ == that.ptr_}; } diff --git a/tests/std/include/test_atomic_wait.hpp b/tests/std/include/test_atomic_wait.hpp index a856798f3a..a3d9b4471c 100644 --- a/tests/std/include/test_atomic_wait.hpp +++ b/tests/std/include/test_atomic_wait.hpp @@ -10,8 +10,8 @@ #include #include -template -void test_atomic_wait_func(const UnderlyingType old_value, const UnderlyingType new_value, +template