From be1dabe47fff29850e294a2dca5b165f2e5f96ea Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 04:41:08 -0700 Subject: [PATCH 01/20] Simplify `__std_atomic_has_cmpxchg16b`: cmpxchg16b is always available for 64-bit Win8.1 / Server 2012 R2. https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent > PF_COMPARE_EXCHANGE128 > The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. https://support.microsoft.com/en-us/windows/system-requirements-2f327e5a-2bae-4011-8848-58180a4353a7 > To install a 64-bit OS on a 64-bit PC, your processor needs to support CMPXCHG16b, PrefetchW, and LAHF/SAHF As mentioned by https://en.wikipedia.org/wiki/Windows_8 and confirmed by https://learn.microsoft.com/en-us/lifecycle/products/windows-8 https://learn.microsoft.com/en-us/lifecycle/products/windows-embedded-8-standard support for Win8.0 client ended in Jan 2016, with the exception of Windows Embedded 8 Standard which ended in Jul 2023. The one we have to worry about is Server 2012 classic, which is receiving extended security updates through Oct 2026: https://learn.microsoft.com/en-us/lifecycle/products/windows-server-2012 But only C++20 `atomic_ref<16 bytes>` is physically affected, and only on potato chips, and only for x64. (32-bit servers would be unaffected.) The potential impact is nil. Server 2012 classic was released in Sep 2012 and was the latest version for only one year, before Server 2012 R2 was released in Oct 2013. cmpxchg16b support appeared even earlier: in Intel Nehalem released in Nov 2008, and AMD Bulldozer released in Oct 2011. --- stl/src/atomic_wait.cpp | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index b9d907150b..733730941d 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -266,24 +266,10 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { } [[nodiscard]] char __stdcall __std_atomic_has_cmpxchg16b() noexcept { -#if !defined(_WIN64) - return false; -#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#ifdef _WIN64 return true; -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv - constexpr char _Cmpxchg_Absent = 0; - constexpr char _Cmpxchg_Present = 1; - constexpr char _Cmpxchg_Unknown = 2; - - static std::atomic _Cached_value{_Cmpxchg_Unknown}; - - char _Value = _Cached_value.load(std::memory_order_relaxed); - if (_Value == _Cmpxchg_Unknown) { - _Value = IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) ? _Cmpxchg_Present : _Cmpxchg_Absent; - _Cached_value.store(_Value, std::memory_order_relaxed); - } - - return _Value; -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ +#else + return false; +#endif } } // extern "C" From b16508d66213c9d81846b6023184e5b392b89b31 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 04:55:17 -0700 Subject: [PATCH 02/20] Simplify `__std_atomic_compare_exchange_128` accordingly. --- stl/src/atomic_wait.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index 733730941d..cd9608a801 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -252,17 +252,11 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { [[nodiscard]] unsigned char __stdcall __std_atomic_compare_exchange_128(_Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, _Inout_bytecount_(16) long long* _ComparandResult) noexcept { -#if !defined(_WIN64) - return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#ifdef _WIN64 return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv - if (__std_atomic_has_cmpxchg16b()) { - return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); - } - +#else return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ +#endif } [[nodiscard]] char __stdcall __std_atomic_has_cmpxchg16b() noexcept { From bad48148d4f3ec9f0527165534640e1d14ad5e76 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 05:20:55 -0700 Subject: [PATCH 03/20] `__std_atomic_compare_exchange_128` and `__std_atomic_has_cmpxchg16b` should `abort` for 32-bit. We can do this because they were declared in `` for 64-bit only; there were never any 32-bit callers. (They have to remain dllexported for 32-bit, but that's all.) Include `` for `abort` (which we were already calling in this file). We can drop `__std_atomic_compare_exchange_128_fallback` without ABI impact because it was defined in an unnamed namespace. --- stl/src/atomic_wait.cpp | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index cd9608a801..eb5c4922a9 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -90,24 +91,6 @@ namespace { } #endif // defined(_DEBUG) } - - [[nodiscard]] unsigned char __std_atomic_compare_exchange_128_fallback( - _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, - _Inout_bytecount_(16) long long* _ComparandResult) noexcept { - static SRWLOCK _Mtx = SRWLOCK_INIT; - _SrwLock_guard _Guard{_Mtx}; - if (_Destination[0] == _ComparandResult[0] && _Destination[1] == _ComparandResult[1]) { - _ComparandResult[0] = _Destination[0]; - _ComparandResult[1] = _Destination[1]; - _Destination[0] = _ExchangeLow; - _Destination[1] = _ExchangeHigh; - return static_cast(true); - } else { - _ComparandResult[0] = _Destination[0]; - _ComparandResult[1] = _Destination[1]; - return static_cast(false); - } - } } // unnamed namespace extern "C" { @@ -255,7 +238,7 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { #ifdef _WIN64 return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); #else - return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); + _CSTD abort(); #endif } @@ -263,7 +246,7 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { #ifdef _WIN64 return true; #else - return false; + _CSTD abort(); #endif } } // extern "C" From 7bf2aa7f11ba75bb5fdf44db2ba61312a0ff2a15 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 05:57:18 -0700 Subject: [PATCH 04/20] Preserve `__std_atomic_compare_exchange_128` for bincompat. Drop the declaration, mark the definition as preserved for bincompat. It's still dllexported by `stl/src/msvcp_atomic_wait.src`. Directly use the `_InterlockedCompareExchange128` intrinsic. --- stl/inc/atomic | 5 +---- stl/src/atomic_wait.cpp | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 307168b01e..5613b80d04 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -37,11 +37,8 @@ _STL_DISABLE_CLANG_WARNINGS // synchronization object shared by all 16-byte atomics. // (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) // All pointer parameters must be 16-byte aligned. -extern "C" _NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128( - _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, - _Inout_bytecount_(16) long long* _ComparandResult) noexcept; extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; -#define _STD_COMPARE_EXCHANGE_128 __std_atomic_compare_exchange_128 +#define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 #endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ #endif // defined(_WIN64) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index eb5c4922a9..15ad2c682a 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -232,6 +232,7 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { } #pragma warning(pop) +// TRANSITION, ABI: preserved for binary compatibility [[nodiscard]] unsigned char __stdcall __std_atomic_compare_exchange_128(_Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, _Inout_bytecount_(16) long long* _ComparandResult) noexcept { From e3a9a916569ea10410c0506cb8b271450195718a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 06:04:43 -0700 Subject: [PATCH 05/20] Replace the `_STD_COMPARE_EXCHANGE_128` macro with `_InterlockedCompareExchange128`. --- stl/inc/atomic | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 5613b80d04..fa50550087 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -30,7 +30,6 @@ _STL_DISABLE_CLANG_WARNINGS #ifdef _WIN64 #if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 -#define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 #else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv // 16-byte atomics are separately compiled for x64, as not all x64 hardware has the cmpxchg16b // instruction; in the event this instruction is not available, the fallback is a global @@ -38,7 +37,6 @@ _STL_DISABLE_CLANG_WARNINGS // (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) // All pointer parameters must be 16-byte aligned. extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; -#define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 #endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ #endif // defined(_WIN64) @@ -600,7 +598,7 @@ inline bool __stdcall _Atomic_wait_compare_16_bytes(const void* _Storage, void* const auto _Cmp = static_cast(_Comparand); alignas(16) long long _Tmp[2] = {_Cmp[0], _Cmp[1]}; #if defined(_M_X64) && !defined(_M_ARM64EC) - return _STD_COMPARE_EXCHANGE_128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; + return _InterlockedCompareExchange128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; #else // ^^^ _M_X64 / ARM64, _M_ARM64EC vvv return _InterlockedCompareExchange128_nf(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; #endif // ^^^ ARM64, _M_ARM64EC ^^^ @@ -1196,7 +1194,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics _NODISCARD _TVal load() const noexcept { // load with sequential consistency long long* const _Storage_ptr = const_cast(_STD _Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 - (void) _STD_COMPARE_EXCHANGE_128(_Storage_ptr, 0, 0, &_Result._Low); + (void) _InterlockedCompareExchange128(_Storage_ptr, 0, 0, &_Result._Low); return reinterpret_cast<_TVal&>(_Result); } @@ -1267,7 +1265,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; - _Result = _STD_COMPARE_EXCHANGE_128(&reinterpret_cast(_Storage), _Desired_bytes._High, + _Result = _InterlockedCompareExchange128(&reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // ^^^ _M_X64 ^^^ if (_Result) { @@ -1293,7 +1291,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; - _Result = _STD_COMPARE_EXCHANGE_128( + _Result = _InterlockedCompareExchange128( &reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // ^^^ _M_X64 ^^^ if (_Result == 0) { @@ -3053,7 +3051,6 @@ _STD_END #undef __STORE_RELEASE #undef _STD_ATOMIC_USE_ARM64_LDAR_STLR -#undef _STD_COMPARE_EXCHANGE_128 #undef _INVALID_MEMORY_ORDER #pragma pop_macro("new") From 60798a31df0e8fc69846777dd723d8ab8c414ef3 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 09:21:44 -0700 Subject: [PATCH 06/20] Allow Clang x64 to use `_InterlockedCompareExchange128`. --- stl/inc/atomic | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/stl/inc/atomic b/stl/inc/atomic index fa50550087..947d41077a 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -47,6 +47,11 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #define _ATOMIC_HAS_DCAS 0 #endif // ^^^ We only sometimes have DCAS ^^^ +// Allow _InterlockedCompareExchange128 to be used: +#if defined(__clang__) && defined(_M_X64) +#pragma clang attribute _STD_ATOMIC_HEADER.push([[gnu::target("cx16")]], apply_to = function) +#endif // ^^^ defined(__clang__) && defined(_M_X64) ^^^ + // Controls whether ARM64 ldar/ldapr/stlr should be used #ifndef _STD_ATOMIC_USE_ARM64_LDAR_STLR #if defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64) @@ -3053,6 +3058,10 @@ _STD_END #undef _INVALID_MEMORY_ORDER +#if defined(__clang__) && defined(_M_X64) +#pragma clang attribute _STD_ATOMIC_HEADER.pop +#endif // ^^^ defined(__clang__) && defined(_M_X64) ^^^ + #pragma pop_macro("new") _STL_RESTORE_CLANG_WARNINGS #pragma warning(pop) From c7261efc97a2d5703f85343ca5d725ea62c0b941 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 06:46:06 -0700 Subject: [PATCH 07/20] Simplify the "future ABI" `atomic::is_lock_free()`. Now that `__std_atomic_has_cmpxchg16b()` always returns `true`, this collapses into the `_ATOMIC_HAS_DCAS` case. --- stl/inc/atomic | 4 ---- 1 file changed, 4 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 947d41077a..de9507655c 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2184,11 +2184,7 @@ public: #else // ^^^ don't break ABI / break ABI vvv _NODISCARD bool is_lock_free() const volatile noexcept { -#if _ATOMIC_HAS_DCAS return sizeof(_Ty) <= 2 * sizeof(void*); -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - return sizeof(_Ty) <= sizeof(void*) || (sizeof(_Ty) <= 2 * sizeof(void*) && __std_atomic_has_cmpxchg16b()); -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ } #endif // ^^^ break ABI ^^^ From 72349d3c4a6f470997a01463b8e2f1f96a817230 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 06:53:34 -0700 Subject: [PATCH 08/20] Major style: Add parens when mixing arithmetic and bitwise operators in "current ABI" `atomic::is_lock_free()`. --- stl/inc/atomic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index de9507655c..0c81bb6e19 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2177,7 +2177,7 @@ public: #if 1 // TRANSITION, ABI _NODISCARD bool is_lock_free() const volatile noexcept { - constexpr bool _Result = sizeof(_Ty) <= 8 && (sizeof(_Ty) & sizeof(_Ty) - 1) == 0; + constexpr bool _Result = sizeof(_Ty) <= 8 && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; return _Result; } From 6411c9750bb0ed4cdf19689b0143bf98cfb61594 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 06:56:20 -0700 Subject: [PATCH 09/20] Minor style: Simplify `atomic::is_lock_free()`. We don't need to replicate the signature, nor does `constexpr bool _Result` improve clarity. --- stl/inc/atomic | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 0c81bb6e19..788ac3cff2 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2175,18 +2175,13 @@ public: static constexpr bool is_always_lock_free = _Is_always_lock_free; #endif // _HAS_CXX17 -#if 1 // TRANSITION, ABI _NODISCARD bool is_lock_free() const volatile noexcept { - constexpr bool _Result = sizeof(_Ty) <= 8 && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; - return _Result; - } - +#if 1 // TRANSITION, ABI + return sizeof(_Ty) <= 8 && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; #else // ^^^ don't break ABI / break ABI vvv - - _NODISCARD bool is_lock_free() const volatile noexcept { return sizeof(_Ty) <= 2 * sizeof(void*); - } #endif // ^^^ break ABI ^^^ + } _NODISCARD bool is_lock_free() const noexcept { return static_cast(this)->is_lock_free(); From b855bb90e96c41c6d65b238b4215848297da80b5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 07:37:32 -0700 Subject: [PATCH 10/20] Enhance the "future ABI" `_Is_always_lock_free` (aka `atomic::is_always_lock_free`). `atomic::is_lock_free()` collapsed into the `_ATOMIC_HAS_DCAS` case, so this does too. --- stl/inc/atomic | 5 ----- 1 file changed, 5 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 788ac3cff2..afc6cbeb8d 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -1649,13 +1649,8 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper template constexpr bool _Is_always_lock_free = _TypeSize <= 8 && (_TypeSize & (_TypeSize - 1)) == 0; #else // ^^^ don't break ABI / break ABI vvv -#if _ATOMIC_HAS_DCAS template constexpr bool _Is_always_lock_free = _TypeSize <= 2 * sizeof(void*); -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv -template -constexpr bool _Is_always_lock_free = _TypeSize <= sizeof(void*); -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ #endif // ^^^ break ABI ^^^ template > From ea085c53be6dfaa9d4a321a08680d6691cf4608d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 07:42:32 -0700 Subject: [PATCH 11/20] Centralize `atomic::is_lock_free()` and `_Is_always_lock_free`. --- stl/inc/atomic | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index afc6cbeb8d..80088675a4 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2171,11 +2171,7 @@ public: #endif // _HAS_CXX17 _NODISCARD bool is_lock_free() const volatile noexcept { -#if 1 // TRANSITION, ABI - return sizeof(_Ty) <= 8 && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; -#else // ^^^ don't break ABI / break ABI vvv - return sizeof(_Ty) <= 2 * sizeof(void*); -#endif // ^^^ break ABI ^^^ + return _Is_always_lock_free; } _NODISCARD bool is_lock_free() const noexcept { From c468e9e2cabf072294fc52610e004fc392825a37 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 07:26:15 -0700 Subject: [PATCH 12/20] `atomic_ref::is_lock_free()`: Simplify. `__std_atomic_has_cmpxchg16b()` always returns `true` now. --- stl/inc/atomic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 80088675a4..f95dbbdaad 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2353,7 +2353,7 @@ public: if constexpr (is_always_lock_free) { return true; } else if constexpr (_Is_potentially_lock_free) { - return __std_atomic_has_cmpxchg16b() != 0; + return true; } else { return false; } From 3861e48f6c78a6640db2e401f3dae80d7a7ec8cf Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 07:52:53 -0700 Subject: [PATCH 13/20] `atomic_ref::is_lock_free()`: Further simplify. For `!_ATOMIC_HAS_DCAS`: `is_always_lock_free` (power of two, at most 1 pointer) is a strict subset of `_Is_potentially_lock_free` (power of two, at most 2 pointers). --- stl/inc/atomic | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index f95dbbdaad..9c282e2196 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2350,13 +2350,7 @@ public: #if _ATOMIC_HAS_DCAS return is_always_lock_free; #else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - if constexpr (is_always_lock_free) { - return true; - } else if constexpr (_Is_potentially_lock_free) { - return true; - } else { - return false; - } + return _Is_potentially_lock_free; #endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ } From 0e17494719220a0f1b5dc6c4138a27fa264775e3 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 07:57:16 -0700 Subject: [PATCH 14/20] Enhance `atomic_ref::is_always_lock_free`. This describes what `atomic_ref::is_lock_free()` now returns. --- stl/inc/atomic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 9c282e2196..90148c8c1f 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2341,7 +2341,7 @@ public: #if _ATOMIC_HAS_DCAS _Is_potentially_lock_free; #else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - _Is_potentially_lock_free && sizeof(_Ty) <= sizeof(void*); + _Is_potentially_lock_free; #endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ static constexpr size_t required_alignment = _Is_potentially_lock_free ? sizeof(_Ty) : alignof(_Ty); From fe82b0c4dabc16f49a6a4e8d32d3bc681c804716 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 08:01:06 -0700 Subject: [PATCH 15/20] Collapse `atomic_ref` `is_always_lock_free` and `is_lock_free()`. Again, this all collapses into the `_ATOMIC_HAS_DCAS` case. --- stl/inc/atomic | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 90148c8c1f..64d94facff 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2323,7 +2323,7 @@ public: using value_type = _Ty; explicit atomic_ref(_Ty& _Value) noexcept /* strengthened */ : _Base(_Value) { - if constexpr (_Is_potentially_lock_free) { + if constexpr (is_always_lock_free) { _Check_alignment(_Value); } else { this->_Init_spinlock_for_ref(); @@ -2334,24 +2334,13 @@ public: atomic_ref& operator=(const atomic_ref&) = delete; - static constexpr bool _Is_potentially_lock_free = - sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; - static constexpr bool is_always_lock_free = -#if _ATOMIC_HAS_DCAS - _Is_potentially_lock_free; -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - _Is_potentially_lock_free; -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ + sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; - static constexpr size_t required_alignment = _Is_potentially_lock_free ? sizeof(_Ty) : alignof(_Ty); + static constexpr size_t required_alignment = is_always_lock_free ? sizeof(_Ty) : alignof(_Ty); _NODISCARD bool is_lock_free() const noexcept { -#if _ATOMIC_HAS_DCAS return is_always_lock_free; -#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv - return _Is_potentially_lock_free; -#endif // ^^^ !_ATOMIC_HAS_DCAS ^^^ } void store(const _Ty _Value) const noexcept { From 0c46d60c546a7c03502d01b5c4e30742cd34ad45 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 08:06:43 -0700 Subject: [PATCH 16/20] `_ATOMIC_HAS_DCAS` is now unused. --- stl/inc/atomic | 8 -------- 1 file changed, 8 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 64d94facff..64d26bc1d9 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -40,13 +40,6 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ #endif // defined(_WIN64) -// Controls whether atomic::is_always_lock_free triggers for sizeof(void *) or 2 * sizeof(void *) -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) || defined(_M_ARM64EC) -#define _ATOMIC_HAS_DCAS 1 -#else // ^^^ We always have DCAS / We only sometimes have DCAS vvv -#define _ATOMIC_HAS_DCAS 0 -#endif // ^^^ We only sometimes have DCAS ^^^ - // Allow _InterlockedCompareExchange128 to be used: #if defined(__clang__) && defined(_M_X64) #pragma clang attribute _STD_ATOMIC_HEADER.push([[gnu::target("cx16")]], apply_to = function) @@ -3014,7 +3007,6 @@ _STD_END #undef _ATOMIC_STORE_32_SEQ_CST #undef _ATOMIC_STORE_64_SEQ_CST #undef _ATOMIC_STORE_64_SEQ_CST_IX86 -#undef _ATOMIC_HAS_DCAS #undef _ATOMIC_STORE_SEQ_CST_ARM64 #undef __LOAD_ACQUIRE_ARM64 #undef _ATOMIC_LOAD_ARM64 From ba77cd116f217eb49822ee4b2b05963ad5deebc6 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 08:10:24 -0700 Subject: [PATCH 17/20] Preserve `__std_atomic_has_cmpxchg16b` for bincompat. Drop the declaration, mark the definition as preserved for bincompat. It's still dllexported by `stl/src/msvcp_atomic_wait.src`. --- stl/inc/atomic | 12 ------------ stl/src/atomic_wait.cpp | 1 + 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 64d26bc1d9..69bb22d98a 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -28,18 +28,6 @@ _STL_DISABLE_CLANG_WARNINGS #pragma push_macro("new") #undef new -#ifdef _WIN64 -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 -#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 / _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv -// 16-byte atomics are separately compiled for x64, as not all x64 hardware has the cmpxchg16b -// instruction; in the event this instruction is not available, the fallback is a global -// synchronization object shared by all 16-byte atomics. -// (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) -// All pointer parameters must be 16-byte aligned. -extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; -#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 ^^^ -#endif // defined(_WIN64) - // Allow _InterlockedCompareExchange128 to be used: #if defined(__clang__) && defined(_M_X64) #pragma clang attribute _STD_ATOMIC_HEADER.push([[gnu::target("cx16")]], apply_to = function) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index 15ad2c682a..bee45e7f14 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -243,6 +243,7 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { #endif } +// TRANSITION, ABI: preserved for binary compatibility [[nodiscard]] char __stdcall __std_atomic_has_cmpxchg16b() noexcept { #ifdef _WIN64 return true; From 96d86e7694d874aa70b0aeef8f933e8f1d3d72a8 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 16 Jun 2024 08:27:18 -0700 Subject: [PATCH 18/20] Drop `_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B` and simplify test coverage. It's now unused by product code, so we can drop its definition and consistency checks. Then, `P0019R8_atomic_ref` can assume a single mode. --- stl/inc/yvals.h | 16 ---------------- tests/std/tests/P0019R8_atomic_ref/env.lst | 3 --- tests/std/tests/P0019R8_atomic_ref/test.cpp | 13 ------------- 3 files changed, 32 deletions(-) diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index d2201390f5..33ac7c5120 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -315,22 +315,6 @@ _EMIT_STL_WARNING(STL4001, "/clr:pure is deprecated and will be REMOVED."); #define _LOCK_STREAM 2 #define _LOCK_DEBUG 3 -#ifndef _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B -#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) -#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 1 -#else // ^^^ modern 64-bit / less modern or 32-bit vvv -#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 0 -#endif // _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) -#endif // !defined(_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B) - -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) -#error ARM64 requires _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B to be 1. -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) - -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) -#error _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 requires 64-bit. -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) - _STD_BEGIN enum _Uninitialized { // tag for suppressing initialization _Noinit diff --git a/tests/std/tests/P0019R8_atomic_ref/env.lst b/tests/std/tests/P0019R8_atomic_ref/env.lst index b3f3ea4864..351a8293d9 100644 --- a/tests/std/tests/P0019R8_atomic_ref/env.lst +++ b/tests/std/tests/P0019R8_atomic_ref/env.lst @@ -2,6 +2,3 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception RUNALL_INCLUDE ..\usual_20_matrix.lst -RUNALL_CROSSLIST -* PM_CL="" -* PM_CL="/D_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B=1 /DTEST_CMPXCHG16B" diff --git a/tests/std/tests/P0019R8_atomic_ref/test.cpp b/tests/std/tests/P0019R8_atomic_ref/test.cpp index 6cdfd4518a..8edf08cfd9 100644 --- a/tests/std/tests/P0019R8_atomic_ref/test.cpp +++ b/tests/std/tests/P0019R8_atomic_ref/test.cpp @@ -1,12 +1,6 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#if defined(TEST_CMPXCHG16B) && (defined(__clang__) || !defined(_M_X64)) -// Skip Clang because it would require the -mcx16 compiler option. -// Skip non-x64 because _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B is always 1 for ARM64, and is forbidden to be 1 for 32-bit. -int main() {} -#else // ^^^ skip test / run test vvv - #include #include #include @@ -436,13 +430,8 @@ void test_gh_4472() { static_assert(std::atomic_ref::required_alignment == sizeof(two_pointers_t)); -#ifdef _WIN64 - static_assert(std::atomic_ref::is_always_lock_free == _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B); -#else static_assert(std::atomic_ref::is_always_lock_free); -#endif - // We expect tests to run on machines that support DCAS, which is required by Win8+. std::atomic_ref ar{two_pointers}; assert(ar.is_lock_free()); } @@ -518,5 +507,3 @@ int main() { test_gh_4472(); test_gh_4728(); } - -#endif // ^^^ run test ^^^ From 537bc95e6bef94a023818f724b8bb2c38546f172 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 24 Jun 2024 23:15:42 -0700 Subject: [PATCH 19/20] `_STL_WIN32_WINNT_WINBLUE` is now unused. --- stl/inc/yvals_core.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index 7a7f6d007e..4b041681a9 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -1941,10 +1941,9 @@ compiler option, or define _ALLOW_RTCc_IN_STL to suppress this error. #error In yvals_core.h, defined(MRTDLL) implies defined(_M_CEE_PURE); !defined(_M_CEE_PURE) implies !defined(MRTDLL) #endif // defined(MRTDLL) && !defined(_M_CEE_PURE) -#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h -#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h -#define _STL_WIN32_WINNT_WINBLUE 0x0603 // _WIN32_WINNT_WINBLUE from sdkddkver.h -#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h +#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h +#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h // Note that the STL DLL builds will set this to XP for ABI compatibility with VS2015 which supported XP. #ifndef _STL_WIN32_WINNT From e13d67908b216b8f7efab5d710c462c5267b2a59 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 28 Jun 2024 15:30:15 -0700 Subject: [PATCH 20/20] Silence unreferenced parameter warnings for 32-bit. --- stl/src/atomic_wait.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index bee45e7f14..4bafb1a86b 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -238,9 +238,13 @@ _Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { _Inout_bytecount_(16) long long* _ComparandResult) noexcept { #ifdef _WIN64 return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); -#else +#else // ^^^ 64-bit / 32-bit vvv + (void) _Destination; + (void) _ExchangeHigh; + (void) _ExchangeLow; + (void) _ComparandResult; _CSTD abort(); -#endif +#endif // ^^^ 32-bit ^^^ } // TRANSITION, ABI: preserved for binary compatibility