From e99c681c95b4cb790c2220b41cc07074ffd43616 Mon Sep 17 00:00:00 2001 From: Geoffry Song Date: Wed, 21 Aug 2024 14:37:39 -0700 Subject: [PATCH 01/10] Clean up cfg-gating of ProcessPrng extern --- std/src/sys/pal/windows/c.rs | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/std/src/sys/pal/windows/c.rs b/std/src/sys/pal/windows/c.rs index 2f5d75dc4bc23..8900410cf609b 100644 --- a/std/src/sys/pal/windows/c.rs +++ b/std/src/sys/pal/windows/c.rs @@ -109,19 +109,22 @@ if #[cfg(not(target_vendor = "uwp"))] { } // Use raw-dylib to import ProcessPrng as we can't rely on there being an import library. -cfg_if::cfg_if! { -if #[cfg(not(target_vendor = "win7"))] { - #[cfg(target_arch = "x86")] - #[link(name = "bcryptprimitives", kind = "raw-dylib", import_name_type = "undecorated")] - extern "system" { - pub fn ProcessPrng(pbdata: *mut u8, cbdata: usize) -> BOOL; - } - #[cfg(not(target_arch = "x86"))] - #[link(name = "bcryptprimitives", kind = "raw-dylib")] - extern "system" { - pub fn ProcessPrng(pbdata: *mut u8, cbdata: usize) -> BOOL; - } -}} +#[cfg(not(target_vendor = "win7"))] +#[cfg_attr( + target_arch = "x86", + link( + name = "bcryptprimitives", + kind = "raw-dylib", + import_name_type = "undecorated" + ) +)] +#[cfg_attr( + not(target_arch = "x86"), + link(name = "bcryptprimitives", kind = "raw-dylib") +)] +extern "system" { + pub fn ProcessPrng(pbdata: *mut u8, cbdata: usize) -> BOOL; +} // Functions that aren't available on every version of Windows that we support, // but we still use them and just provide some form of a fallback implementation. From b51f35e9d4748a521965a2ae40788ab81375b3ae Mon Sep 17 00:00:00 2001 From: Geoffry Song Date: Wed, 21 Aug 2024 15:34:51 -0700 Subject: [PATCH 02/10] format --- std/src/sys/pal/windows/c.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/std/src/sys/pal/windows/c.rs b/std/src/sys/pal/windows/c.rs index 8900410cf609b..b888eb7d95ca3 100644 --- a/std/src/sys/pal/windows/c.rs +++ b/std/src/sys/pal/windows/c.rs @@ -112,16 +112,9 @@ if #[cfg(not(target_vendor = "uwp"))] { #[cfg(not(target_vendor = "win7"))] #[cfg_attr( target_arch = "x86", - link( - name = "bcryptprimitives", - kind = "raw-dylib", - import_name_type = "undecorated" - ) -)] -#[cfg_attr( - not(target_arch = "x86"), - link(name = "bcryptprimitives", kind = "raw-dylib") + link(name = "bcryptprimitives", kind = "raw-dylib", import_name_type = "undecorated") )] +#[cfg_attr(not(target_arch = "x86"), link(name = "bcryptprimitives", kind = "raw-dylib"))] extern "system" { pub fn ProcessPrng(pbdata: *mut u8, cbdata: usize) -> BOOL; } From 012bb44c6c589af133dc96472f2e73349efff2c3 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Fri, 14 Jun 2024 18:05:09 +0100 Subject: [PATCH 03/10] rustc_target: Add various aarch64 features Add various aarch64 features already supported by LLVM and Linux. The features are marked as unstable using a newly added symbol, i.e. aarch64_unstable_target_feature. Additionally include some comment fixes to ensure consistency of feature names with the Arm ARM and support for architecture version target features up to v9.5a. This commit adds compiler support for the following features: - FEAT_CSSC - FEAT_ECV - FEAT_FAMINMAX - FEAT_FLAGM2 - FEAT_FP8 - FEAT_FP8DOT2 - FEAT_FP8DOT4 - FEAT_FP8FMA - FEAT_FPMR - FEAT_HBC - FEAT_LSE128 - FEAT_LSE2 - FEAT_LUT - FEAT_MOPS - FEAT_LRCPC3 - FEAT_SVE_B16B16 - FEAT_SVE2p1 - FEAT_WFxT --- std/tests/run-time-detect.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/std/tests/run-time-detect.rs b/std/tests/run-time-detect.rs index 6948670565662..779f0d1a9b892 100644 --- a/std/tests/run-time-detect.rs +++ b/std/tests/run-time-detect.rs @@ -4,6 +4,10 @@ all(target_arch = "arm", any(target_os = "linux", target_os = "android")), feature(stdarch_arm_feature_detection) )] +#![cfg_attr( + all(target_arch = "aarch64", any(target_os = "linux", target_os = "android")), + feature(stdarch_aarch64_feature_detection) +)] #![cfg_attr( all(target_arch = "powerpc", target_os = "linux"), feature(stdarch_powerpc_feature_detection) @@ -36,21 +40,34 @@ fn aarch64_linux() { println!("bf16: {}", is_aarch64_feature_detected!("bf16")); println!("bti: {}", is_aarch64_feature_detected!("bti")); println!("crc: {}", is_aarch64_feature_detected!("crc")); + println!("cssc: {}", is_aarch64_feature_detected!("cssc")); println!("dit: {}", is_aarch64_feature_detected!("dit")); println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); println!("dpb2: {}", is_aarch64_feature_detected!("dpb2")); println!("dpb: {}", is_aarch64_feature_detected!("dpb")); + println!("ecv: {}", is_aarch64_feature_detected!("ecv")); println!("f32mm: {}", is_aarch64_feature_detected!("f32mm")); println!("f64mm: {}", is_aarch64_feature_detected!("f64mm")); + println!("faminmax: {}", is_aarch64_feature_detected!("faminmax")); println!("fcma: {}", is_aarch64_feature_detected!("fcma")); println!("fhm: {}", is_aarch64_feature_detected!("fhm")); + println!("flagm2: {}", is_aarch64_feature_detected!("flagm2")); println!("flagm: {}", is_aarch64_feature_detected!("flagm")); println!("fp16: {}", is_aarch64_feature_detected!("fp16")); + println!("fp8: {}", is_aarch64_feature_detected!("fp8")); + println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2")); + println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4")); + println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma")); + println!("fpmr: {}", is_aarch64_feature_detected!("fpmr")); println!("frintts: {}", is_aarch64_feature_detected!("frintts")); + println!("hbc: {}", is_aarch64_feature_detected!("hbc")); println!("i8mm: {}", is_aarch64_feature_detected!("i8mm")); println!("jsconv: {}", is_aarch64_feature_detected!("jsconv")); + println!("lse128: {}", is_aarch64_feature_detected!("lse128")); println!("lse2: {}", is_aarch64_feature_detected!("lse2")); println!("lse: {}", is_aarch64_feature_detected!("lse")); + println!("lut: {}", is_aarch64_feature_detected!("lut")); + println!("mops: {}", is_aarch64_feature_detected!("mops")); println!("mte: {}", is_aarch64_feature_detected!("mte")); println!("neon: {}", is_aarch64_feature_detected!("neon")); println!("paca: {}", is_aarch64_feature_detected!("paca")); @@ -58,6 +75,7 @@ fn aarch64_linux() { println!("pmull: {}", is_aarch64_feature_detected!("pmull")); println!("rand: {}", is_aarch64_feature_detected!("rand")); println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); + println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3")); println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); println!("rdm: {}", is_aarch64_feature_detected!("rdm")); println!("sb: {}", is_aarch64_feature_detected!("sb")); @@ -65,13 +83,16 @@ fn aarch64_linux() { println!("sha3: {}", is_aarch64_feature_detected!("sha3")); println!("sm4: {}", is_aarch64_feature_detected!("sm4")); println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); + println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16")); println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); println!("sve2-bitperm: {}", is_aarch64_feature_detected!("sve2-bitperm")); println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3")); println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4")); println!("sve2: {}", is_aarch64_feature_detected!("sve2")); + println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1")); println!("sve: {}", is_aarch64_feature_detected!("sve")); println!("tme: {}", is_aarch64_feature_detected!("tme")); + println!("wfxt: {}", is_aarch64_feature_detected!("wfxt")); // tidy-alphabetical-end } From 4f6b8149292f3beee6d84d798e2004803ced702c Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Mon, 17 Jun 2024 15:37:46 +0100 Subject: [PATCH 04/10] rustc_target: Add SME aarch64 features Add SME aarch64 features already supported by LLVM and Linux. This commit adds compiler support for the following features: - FEAT_SME - FEAT_SME_F16F16 - FEAT_SME_F64F64 - FEAT_SME_F8F16 - FEAT_SME_F8F32 - FEAT_SME_FA64 - FEAT_SME_I16I64 - FEAT_SME_LUTv2 - FEAT_SME2 - FEAT_SME2p1 - FEAT_SSVE_FP8DOT2 - FEAT_SSVE_FP8DOT4 - FEAT_SSVE_FP8FMA --- std/tests/run-time-detect.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/std/tests/run-time-detect.rs b/std/tests/run-time-detect.rs index 779f0d1a9b892..dcd5cd7f6b9c7 100644 --- a/std/tests/run-time-detect.rs +++ b/std/tests/run-time-detect.rs @@ -82,7 +82,20 @@ fn aarch64_linux() { println!("sha2: {}", is_aarch64_feature_detected!("sha2")); println!("sha3: {}", is_aarch64_feature_detected!("sha3")); println!("sm4: {}", is_aarch64_feature_detected!("sm4")); + println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16")); + println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64")); + println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16")); + println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32")); + println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64")); + println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64")); + println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2")); + println!("sme2: {}", is_aarch64_feature_detected!("sme2")); + println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1")); + println!("sme: {}", is_aarch64_feature_detected!("sme")); println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); + println!("ssve-fp8dot2: {}", is_aarch64_feature_detected!("ssve-fp8dot2")); + println!("ssve-fp8dot4: {}", is_aarch64_feature_detected!("ssve-fp8dot4")); + println!("ssve-fp8fma: {}", is_aarch64_feature_detected!("ssve-fp8fma")); println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16")); println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); println!("sve2-bitperm: {}", is_aarch64_feature_detected!("sve2-bitperm")); From 9876bd1014d4b3cf85a30214bca36a049d5c89c2 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 21 Aug 2024 22:22:18 -0700 Subject: [PATCH 05/10] library: Stabilize new_uninit for Box, Rc, and Arc A partial stabilization that only affects: - AllocType::new_uninit - AllocType::assume_init - AllocType<[T]>::new_uninit_slice - AllocType<[T]>::assume_init where "AllocType" is Box, Rc, or Arc --- alloc/src/boxed.rs | 43 ++++++++++++++++--------------------------- alloc/src/lib.rs | 1 - alloc/src/rc.rs | 26 ++++++++------------------ alloc/src/sync.rs | 26 ++++++++------------------ alloc/tests/lib.rs | 1 - proc_macro/src/lib.rs | 1 - std/src/lib.rs | 1 - 7 files changed, 32 insertions(+), 67 deletions(-) diff --git a/alloc/src/boxed.rs b/alloc/src/boxed.rs index caaf37f0465ed..38b1766c17440 100644 --- a/alloc/src/boxed.rs +++ b/alloc/src/boxed.rs @@ -262,8 +262,6 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(new_uninit)] - /// /// let mut five = Box::::new_uninit(); /// /// let five = unsafe { @@ -276,7 +274,7 @@ impl Box { /// assert_eq!(*five, 5) /// ``` #[cfg(not(no_global_oom_handling))] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] #[inline] pub fn new_uninit() -> Box> { @@ -292,7 +290,6 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(new_zeroed_alloc)] /// /// let zero = Box::::new_zeroed(); @@ -350,7 +347,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// let mut five = Box::::try_new_uninit()?; /// @@ -380,7 +377,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// let zero = Box::::try_new_zeroed()?; /// let zero = unsafe { zero.assume_init() }; @@ -460,7 +457,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -498,7 +495,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -538,7 +535,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -576,7 +573,7 @@ impl Box { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -654,8 +651,6 @@ impl Box<[T]> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] - /// /// let mut values = Box::<[u32]>::new_uninit_slice(3); /// /// let values = unsafe { @@ -670,7 +665,7 @@ impl Box<[T]> { /// assert_eq!(*values, [1, 2, 3]) /// ``` #[cfg(not(no_global_oom_handling))] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] pub fn new_uninit_slice(len: usize) -> Box<[mem::MaybeUninit]> { unsafe { RawVec::with_capacity(len).into_box(len) } @@ -686,7 +681,6 @@ impl Box<[T]> { /// /// ``` /// #![feature(new_zeroed_alloc)] - /// #![feature(new_uninit)] /// /// let values = Box::<[u32]>::new_zeroed_slice(3); /// let values = unsafe { values.assume_init() }; @@ -708,7 +702,7 @@ impl Box<[T]> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// let mut values = Box::<[u32]>::try_new_uninit_slice(3)?; /// let values = unsafe { @@ -746,7 +740,7 @@ impl Box<[T]> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// let values = Box::<[u32]>::try_new_zeroed_slice(3)?; /// let values = unsafe { values.assume_init() }; @@ -778,7 +772,7 @@ impl Box<[T], A> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -812,7 +806,7 @@ impl Box<[T], A> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -837,7 +831,7 @@ impl Box<[T], A> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -880,7 +874,7 @@ impl Box<[T], A> { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::alloc::System; /// @@ -927,8 +921,6 @@ impl Box, A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] - /// /// let mut five = Box::::new_uninit(); /// /// let five: Box = unsafe { @@ -940,7 +932,7 @@ impl Box, A> { /// /// assert_eq!(*five, 5) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[inline] pub unsafe fn assume_init(self) -> Box { let (raw, alloc) = Box::into_raw_with_allocator(self); @@ -958,7 +950,6 @@ impl Box, A> { /// /// ``` /// #![feature(box_uninit_write)] - /// #![feature(new_uninit)] /// /// let big_box = Box::<[usize; 1024]>::new_uninit(); /// @@ -1001,8 +992,6 @@ impl Box<[mem::MaybeUninit], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] - /// /// let mut values = Box::<[u32]>::new_uninit_slice(3); /// /// let values = unsafe { @@ -1016,7 +1005,7 @@ impl Box<[mem::MaybeUninit], A> { /// /// assert_eq!(*values, [1, 2, 3]) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[inline] pub unsafe fn assume_init(self) -> Box<[T], A> { let (raw, alloc) = Box::into_raw_with_allocator(self); diff --git a/alloc/src/lib.rs b/alloc/src/lib.rs index 5e4b08df6cb55..c459a8da820f6 100644 --- a/alloc/src/lib.rs +++ b/alloc/src/lib.rs @@ -93,7 +93,6 @@ // tidy-alphabetical-start #![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))] #![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))] -#![cfg_attr(test, feature(new_uninit))] #![feature(alloc_layout_extra)] #![feature(allocator_api)] #![feature(array_chunks)] diff --git a/alloc/src/rc.rs b/alloc/src/rc.rs index f153aa6d3be9a..1b31a78394eca 100644 --- a/alloc/src/rc.rs +++ b/alloc/src/rc.rs @@ -503,7 +503,6 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -518,7 +517,7 @@ impl Rc { /// assert_eq!(*five, 5) /// ``` #[cfg(not(no_global_oom_handling))] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] pub fn new_uninit() -> Rc> { unsafe { @@ -540,7 +539,6 @@ impl Rc { /// /// ``` /// #![feature(new_zeroed_alloc)] - /// #![feature(new_uninit)] /// /// use std::rc::Rc; /// @@ -594,7 +592,7 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -630,7 +628,7 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::rc::Rc; /// @@ -692,7 +690,6 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// #![feature(allocator_api)] /// @@ -736,7 +733,6 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(allocator_api)] /// /// use std::rc::Rc; @@ -799,7 +795,7 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -843,7 +839,7 @@ impl Rc { /// # Examples /// /// ``` - /// #![feature(allocator_api, new_uninit)] + /// #![feature(allocator_api)] /// /// use std::rc::Rc; /// use std::alloc::System; @@ -967,7 +963,6 @@ impl Rc<[T]> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -985,7 +980,7 @@ impl Rc<[T]> { /// assert_eq!(*values, [1, 2, 3]) /// ``` #[cfg(not(no_global_oom_handling))] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] pub fn new_uninit_slice(len: usize) -> Rc<[mem::MaybeUninit]> { unsafe { Rc::from_ptr(Rc::allocate_for_slice(len)) } @@ -1000,7 +995,6 @@ impl Rc<[T]> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(new_zeroed_alloc)] /// /// use std::rc::Rc; @@ -1035,7 +1029,6 @@ impl Rc<[T], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// #![feature(allocator_api)] /// @@ -1072,7 +1065,6 @@ impl Rc<[T], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(allocator_api)] /// /// use std::rc::Rc; @@ -1122,7 +1114,6 @@ impl Rc, A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -1136,7 +1127,7 @@ impl Rc, A> { /// /// assert_eq!(*five, 5) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[inline] pub unsafe fn assume_init(self) -> Rc { let (ptr, alloc) = Rc::into_inner_with_allocator(self); @@ -1160,7 +1151,6 @@ impl Rc<[mem::MaybeUninit], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::rc::Rc; @@ -1177,7 +1167,7 @@ impl Rc<[mem::MaybeUninit], A> { /// /// assert_eq!(*values, [1, 2, 3]) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[inline] pub unsafe fn assume_init(self) -> Rc<[T], A> { let (ptr, alloc) = Rc::into_inner_with_allocator(self); diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 4a3522f1a641b..aac768eb0535a 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -505,7 +505,6 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -521,7 +520,7 @@ impl Arc { /// ``` #[cfg(not(no_global_oom_handling))] #[inline] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] pub fn new_uninit() -> Arc> { unsafe { @@ -543,7 +542,6 @@ impl Arc { /// /// ``` /// #![feature(new_zeroed_alloc)] - /// #![feature(new_uninit)] /// /// use std::sync::Arc; /// @@ -614,7 +612,7 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit, allocator_api)] + /// #![feature(allocator_api)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -650,7 +648,7 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit, allocator_api)] + /// #![feature( allocator_api)] /// /// use std::sync::Arc; /// @@ -711,7 +709,6 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// #![feature(allocator_api)] /// @@ -755,7 +752,6 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(allocator_api)] /// /// use std::sync::Arc; @@ -845,7 +841,7 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit, allocator_api)] + /// #![feature(allocator_api)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -889,7 +885,7 @@ impl Arc { /// # Examples /// /// ``` - /// #![feature(new_uninit, allocator_api)] + /// #![feature(allocator_api)] /// /// use std::sync::Arc; /// use std::alloc::System; @@ -1101,7 +1097,6 @@ impl Arc<[T]> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -1120,7 +1115,7 @@ impl Arc<[T]> { /// ``` #[cfg(not(no_global_oom_handling))] #[inline] - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use] pub fn new_uninit_slice(len: usize) -> Arc<[mem::MaybeUninit]> { unsafe { Arc::from_ptr(Arc::allocate_for_slice(len)) } @@ -1136,7 +1131,6 @@ impl Arc<[T]> { /// /// ``` /// #![feature(new_zeroed_alloc)] - /// #![feature(new_uninit)] /// /// use std::sync::Arc; /// @@ -1172,7 +1166,6 @@ impl Arc<[T], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// #![feature(allocator_api)] /// @@ -1208,7 +1201,6 @@ impl Arc<[T], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(allocator_api)] /// /// use std::sync::Arc; @@ -1257,7 +1249,6 @@ impl Arc, A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -1271,7 +1262,7 @@ impl Arc, A> { /// /// assert_eq!(*five, 5) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use = "`self` will be dropped if the result is not used"] #[inline] pub unsafe fn assume_init(self) -> Arc { @@ -1296,7 +1287,6 @@ impl Arc<[mem::MaybeUninit], A> { /// # Examples /// /// ``` - /// #![feature(new_uninit)] /// #![feature(get_mut_unchecked)] /// /// use std::sync::Arc; @@ -1313,7 +1303,7 @@ impl Arc<[mem::MaybeUninit], A> { /// /// assert_eq!(*values, [1, 2, 3]) /// ``` - #[unstable(feature = "new_uninit", issue = "63291")] + #[stable(feature = "new_uninit", since = "CURRENT_RUSTC_VERSION")] #[must_use = "`self` will be dropped if the result is not used"] #[inline] pub unsafe fn assume_init(self) -> Arc<[T], A> { diff --git a/alloc/tests/lib.rs b/alloc/tests/lib.rs index 3d4add6fae452..c5c6a122cfec8 100644 --- a/alloc/tests/lib.rs +++ b/alloc/tests/lib.rs @@ -15,7 +15,6 @@ #![feature(exact_size_is_empty)] #![feature(linked_list_cursors)] #![feature(map_try_insert)] -#![feature(new_uninit)] #![feature(pattern)] #![feature(trusted_len)] #![feature(try_reserve_kind)] diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs index c271ac1870624..72b53c60f7439 100644 --- a/proc_macro/src/lib.rs +++ b/proc_macro/src/lib.rs @@ -28,7 +28,6 @@ #![feature(decl_macro)] #![feature(maybe_uninit_write_slice)] #![feature(negative_impls)] -#![feature(new_uninit)] #![feature(panic_can_unwind)] #![feature(restricted_std)] #![feature(rustc_attrs)] diff --git a/std/src/lib.rs b/std/src/lib.rs index f65e9bc8d8b5a..bea8eda261968 100644 --- a/std/src/lib.rs +++ b/std/src/lib.rs @@ -362,7 +362,6 @@ #![feature(allocator_api)] #![feature(get_mut_unchecked)] #![feature(map_try_insert)] -#![feature(new_uninit)] #![feature(new_zeroed_alloc)] #![feature(slice_concat_trait)] #![feature(thin_box)] From 172989811592864fc5e8450c240cd90796c4f119 Mon Sep 17 00:00:00 2001 From: Matthew Giordano Date: Tue, 27 Aug 2024 17:38:51 -0700 Subject: [PATCH 06/10] Add fmt::Debug to sync::Weak --- alloc/src/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alloc/src/sync.rs b/alloc/src/sync.rs index 4a3522f1a641b..1948d3e10c53e 100644 --- a/alloc/src/sync.rs +++ b/alloc/src/sync.rs @@ -335,7 +335,7 @@ impl, U: ?Sized, A: Allocator> CoerceUnsized> f impl, U: ?Sized> DispatchFromDyn> for Weak {} #[stable(feature = "arc_weak", since = "1.4.0")] -impl fmt::Debug for Weak { +impl fmt::Debug for Weak { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "(Weak)") } From 86c924ff3e541cb01ae08e780d0e8ab070565332 Mon Sep 17 00:00:00 2001 From: Kornel Date: Sun, 14 Apr 2024 13:52:58 +0100 Subject: [PATCH 07/10] fmt-debug option Allows disabling `fmt::Debug` derive and debug formatting. --- core/src/fmt/rt.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/fmt/rt.rs b/core/src/fmt/rt.rs index 65a4d537cc74d..f29ac99b29255 100644 --- a/core/src/fmt/rt.rs +++ b/core/src/fmt/rt.rs @@ -118,6 +118,10 @@ impl<'a> Argument<'a> { Self::new(x, Debug::fmt) } #[inline(always)] + pub fn new_debug_noop<'b, T: Debug>(x: &'b T) -> Argument<'_> { + Self::new(x, |_, _| Ok(())) + } + #[inline(always)] pub fn new_octal<'b, T: Octal>(x: &'b T) -> Argument<'_> { Self::new(x, Octal::fmt) } From 60fd9c980c3341963bdd8bb69992c0fcc401096f Mon Sep 17 00:00:00 2001 From: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:38:19 +0800 Subject: [PATCH 08/10] Update `compiler_builtins` to `0.1.123` Signed-off-by: Amjad Alsharafi <26300843+Amjad50@users.noreply.github.com> --- Cargo.lock | 4 ++-- alloc/Cargo.toml | 2 +- std/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa22181a4639a..54ad052c52322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,9 +58,9 @@ dependencies = [ [[package]] name = "compiler_builtins" -version = "0.1.121" +version = "0.1.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce956e6dc07082ec481f0935a51e83b343f8ca51be560452c0ebf830d0bdf5a5" +checksum = "b47fcbecb558bdad78c7d3a998523c60a50dd6cd046d5fe74163e309e878fff7" dependencies = [ "cc", "rustc-std-workspace-core", diff --git a/alloc/Cargo.toml b/alloc/Cargo.toml index a39a0a6ce0ea2..4365bcc4ad022 100644 --- a/alloc/Cargo.toml +++ b/alloc/Cargo.toml @@ -10,7 +10,7 @@ edition = "2021" [dependencies] core = { path = "../core" } -compiler_builtins = { version = "0.1.121", features = ['rustc-dep-of-std'] } +compiler_builtins = { version = "0.1.123", features = ['rustc-dep-of-std'] } [dev-dependencies] rand = { version = "0.8.5", default-features = false, features = ["alloc"] } diff --git a/std/Cargo.toml b/std/Cargo.toml index 334c75df2315a..5f0144922ca69 100644 --- a/std/Cargo.toml +++ b/std/Cargo.toml @@ -17,7 +17,7 @@ cfg-if = { version = "1.0", features = ['rustc-dep-of-std'] } panic_unwind = { path = "../panic_unwind", optional = true } panic_abort = { path = "../panic_abort" } core = { path = "../core", public = true } -compiler_builtins = { version = "0.1.121" } +compiler_builtins = { version = "0.1.123" } profiler_builtins = { path = "../profiler_builtins", optional = true } unwind = { path = "../unwind" } hashbrown = { version = "0.14", default-features = false, features = [ From 21396512b94cae50e696bb0093a2fad804d63f36 Mon Sep 17 00:00:00 2001 From: "Chai T. Rex" Date: Mon, 26 Aug 2024 01:34:25 -0400 Subject: [PATCH 09/10] Improve `isqrt` tests and add benchmarks * Choose test inputs more thoroughly and systematically. * Check that `isqrt` and `checked_isqrt` have equivalent results for signed types, either equivalent numerically or equivalent as a panic and a `None`. * Check that `isqrt` has numerically-equivalent results for unsigned types and their `NonZero` counterparts. * Reuse `ilog10` benchmarks, plus benchmarks that use a uniform distribution. --- core/benches/lib.rs | 1 + core/benches/num/int_sqrt/mod.rs | 62 ++++++++ core/benches/num/mod.rs | 1 + core/tests/num/int_macros.rs | 32 ---- core/tests/num/int_sqrt.rs | 248 +++++++++++++++++++++++++++++++ core/tests/num/mod.rs | 1 + 6 files changed, 313 insertions(+), 32 deletions(-) create mode 100644 core/benches/num/int_sqrt/mod.rs create mode 100644 core/tests/num/int_sqrt.rs diff --git a/core/benches/lib.rs b/core/benches/lib.rs index 32d15c386cb1b..3f1c58bbd7204 100644 --- a/core/benches/lib.rs +++ b/core/benches/lib.rs @@ -8,6 +8,7 @@ #![feature(iter_array_chunks)] #![feature(iter_next_chunk)] #![feature(iter_advance_by)] +#![feature(isqrt)] extern crate test; diff --git a/core/benches/num/int_sqrt/mod.rs b/core/benches/num/int_sqrt/mod.rs new file mode 100644 index 0000000000000..3c9d173e456a1 --- /dev/null +++ b/core/benches/num/int_sqrt/mod.rs @@ -0,0 +1,62 @@ +use rand::Rng; +use test::{black_box, Bencher}; + +macro_rules! int_sqrt_bench { + ($t:ty, $predictable:ident, $random:ident, $random_small:ident, $random_uniform:ident) => { + #[bench] + fn $predictable(bench: &mut Bencher) { + bench.iter(|| { + for n in 0..(<$t>::BITS / 8) { + for i in 1..=(100 as $t) { + let x = black_box(i << (n * 8)); + black_box(x.isqrt()); + } + } + }); + } + + #[bench] + fn $random(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the whole range of the type. */ + let numbers: Vec<$t> = + (0..256).map(|_| rng.gen::<$t>() >> rng.gen_range(0..<$t>::BITS)).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + + #[bench] + fn $random_small(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the range 0..256. */ + let numbers: Vec<$t> = + (0..256).map(|_| (rng.gen::() >> rng.gen_range(0..u8::BITS)) as $t).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + + #[bench] + fn $random_uniform(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the whole range of the type. */ + let numbers: Vec<$t> = (0..256).map(|_| rng.gen::<$t>()).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + }; +} + +int_sqrt_bench! {u8, u8_sqrt_predictable, u8_sqrt_random, u8_sqrt_random_small, u8_sqrt_uniform} +int_sqrt_bench! {u16, u16_sqrt_predictable, u16_sqrt_random, u16_sqrt_random_small, u16_sqrt_uniform} +int_sqrt_bench! {u32, u32_sqrt_predictable, u32_sqrt_random, u32_sqrt_random_small, u32_sqrt_uniform} +int_sqrt_bench! {u64, u64_sqrt_predictable, u64_sqrt_random, u64_sqrt_random_small, u64_sqrt_uniform} +int_sqrt_bench! {u128, u128_sqrt_predictable, u128_sqrt_random, u128_sqrt_random_small, u128_sqrt_uniform} diff --git a/core/benches/num/mod.rs b/core/benches/num/mod.rs index c1dc3a3062256..7ff7443cfa7fe 100644 --- a/core/benches/num/mod.rs +++ b/core/benches/num/mod.rs @@ -2,6 +2,7 @@ mod dec2flt; mod flt2dec; mod int_log; mod int_pow; +mod int_sqrt; use std::str::FromStr; diff --git a/core/tests/num/int_macros.rs b/core/tests/num/int_macros.rs index 7cd3b54e3f39a..830a96204ca03 100644 --- a/core/tests/num/int_macros.rs +++ b/core/tests/num/int_macros.rs @@ -288,38 +288,6 @@ macro_rules! int_module { assert_eq!(r.saturating_pow(0), 1 as $T); } - #[test] - fn test_isqrt() { - assert_eq!($T::MIN.checked_isqrt(), None); - assert_eq!((-1 as $T).checked_isqrt(), None); - assert_eq!((0 as $T).isqrt(), 0 as $T); - assert_eq!((1 as $T).isqrt(), 1 as $T); - assert_eq!((2 as $T).isqrt(), 1 as $T); - assert_eq!((99 as $T).isqrt(), 9 as $T); - assert_eq!((100 as $T).isqrt(), 10 as $T); - } - - #[cfg(not(miri))] // Miri is too slow - #[test] - fn test_lots_of_isqrt() { - let n_max: $T = (1024 * 1024).min($T::MAX as u128) as $T; - for n in 0..=n_max { - let isqrt: $T = n.isqrt(); - - assert!(isqrt.pow(2) <= n); - let (square, overflow) = (isqrt + 1).overflowing_pow(2); - assert!(overflow || square > n); - } - - for n in ($T::MAX - 127)..=$T::MAX { - let isqrt: $T = n.isqrt(); - - assert!(isqrt.pow(2) <= n); - let (square, overflow) = (isqrt + 1).overflowing_pow(2); - assert!(overflow || square > n); - } - } - #[test] fn test_div_floor() { let a: $T = 8; diff --git a/core/tests/num/int_sqrt.rs b/core/tests/num/int_sqrt.rs new file mode 100644 index 0000000000000..d68db0787d22c --- /dev/null +++ b/core/tests/num/int_sqrt.rs @@ -0,0 +1,248 @@ +macro_rules! tests { + ($isqrt_consistency_check_fn_macro:ident : $($T:ident)+) => { + $( + mod $T { + $isqrt_consistency_check_fn_macro!($T); + + // Check that the following produce the correct values from + // `isqrt`: + // + // * the first and last 128 nonnegative values + // * powers of two, minus one + // * powers of two + // + // For signed types, check that `checked_isqrt` and `isqrt` + // either produce the same numeric value or respectively + // produce `None` and a panic. Make sure to do a consistency + // check for `<$T>::MIN` as well, as no nonnegative values + // negate to it. + // + // For unsigned types check that `isqrt` produces the same + // numeric value for `$T` and `NonZero<$T>`. + #[test] + fn isqrt() { + isqrt_consistency_check(<$T>::MIN); + + for n in (0..=127) + .chain(<$T>::MAX - 127..=<$T>::MAX) + .chain((0..<$T>::MAX.count_ones()).map(|exponent| (1 << exponent) - 1)) + .chain((0..<$T>::MAX.count_ones()).map(|exponent| 1 << exponent)) + { + isqrt_consistency_check(n); + + let isqrt_n = n.isqrt(); + assert!( + isqrt_n + .checked_mul(isqrt_n) + .map(|isqrt_n_squared| isqrt_n_squared <= n) + .unwrap_or(false), + "`{n}.isqrt()` should be lower than {isqrt_n}." + ); + assert!( + (isqrt_n + 1) + .checked_mul(isqrt_n + 1) + .map(|isqrt_n_plus_1_squared| n < isqrt_n_plus_1_squared) + .unwrap_or(true), + "`{n}.isqrt()` should be higher than {isqrt_n})." + ); + } + } + + // Check the square roots of: + // + // * the first 1,024 perfect squares + // * halfway between each of the first 1,024 perfect squares + // and the next perfect square + // * the next perfect square after the each of the first 1,024 + // perfect squares, minus one + // * the last 1,024 perfect squares + // * the last 1,024 perfect squares, minus one + // * halfway between each of the last 1,024 perfect squares + // and the previous perfect square + #[test] + // Skip this test on Miri, as it takes too long to run. + #[cfg(not(miri))] + fn isqrt_extended() { + // The correct value is worked out by using the fact that + // the nth nonzero perfect square is the sum of the first n + // odd numbers: + // + // 1 = 1 + // 4 = 1 + 3 + // 9 = 1 + 3 + 5 + // 16 = 1 + 3 + 5 + 7 + // + // Note also that the last odd number added in is two times + // the square root of the previous perfect square, plus + // one: + // + // 1 = 2*0 + 1 + // 3 = 2*1 + 1 + // 5 = 2*2 + 1 + // 7 = 2*3 + 1 + // + // That means we can add the square root of this perfect + // square once to get about halfway to the next perfect + // square, then we can add the square root of this perfect + // square again to get to the next perfect square, minus + // one, then we can add one to get to the next perfect + // square. + // + // This allows us to, for each of the first 1,024 perfect + // squares, test that the square roots of the following are + // all correct and equal to each other: + // + // * the current perfect square + // * about halfway to the next perfect square + // * the next perfect square, minus one + let mut n: $T = 0; + for sqrt_n in 0..1_024.min((1_u128 << (<$T>::MAX.count_ones()/2)) - 1) as $T { + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`{sqrt_n}.pow(2).isqrt()` should be {sqrt_n}." + ); + + n += sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "{n} is about halfway between `{sqrt_n}.pow(2)` and `{}.pow(2)`, so `{n}.isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n += sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`({}.pow(2) - 1).isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n += 1; + } + + // Similarly, for each of the last 1,024 perfect squares, + // check: + // + // * the current perfect square + // * the current perfect square, minus one + // * about halfway to the previous perfect square + // + // `MAX`'s `isqrt` return value is verified in the `isqrt` + // test function above. + let maximum_sqrt = <$T>::MAX.isqrt(); + let mut n = maximum_sqrt * maximum_sqrt; + + for sqrt_n in (maximum_sqrt - 1_024.min((1_u128 << (<$T>::MAX.count_ones()/2)) - 1) as $T..maximum_sqrt).rev() { + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n + 1, + "`{0}.pow(2).isqrt()` should be {0}.", + sqrt_n + 1 + ); + + n -= 1; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`({}.pow(2) - 1).isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n -= sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "{n} is about halfway between `{sqrt_n}.pow(2)` and `{}.pow(2)`, so `{n}.isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n -= sqrt_n; + } + } + } + )* + }; +} + +macro_rules! signed_check { + ($T:ident) => { + /// This takes an input and, if it's nonnegative or + #[doc = concat!("`", stringify!($T), "::MIN`,")] + /// checks that `isqrt` and `checked_isqrt` produce equivalent results + /// for that input and for the negative of that input. + /// + /// # Note + /// + /// This cannot check that negative inputs to `isqrt` cause panics if + /// panics abort instead of unwind. + fn isqrt_consistency_check(n: $T) { + // `<$T>::MIN` will be negative, so ignore it in this nonnegative + // section. + if n >= 0 { + assert_eq!( + Some(n.isqrt()), + n.checked_isqrt(), + "`{n}.checked_isqrt()` should match `Some({n}.isqrt())`.", + ); + } + + // `wrapping_neg` so that `<$T>::MIN` will negate to itself rather + // than panicking. + let negative_n = n.wrapping_neg(); + + // Zero negated will still be nonnegative, so ignore it in this + // negative section. + if negative_n < 0 { + assert_eq!( + negative_n.checked_isqrt(), + None, + "`({negative_n}).checked_isqrt()` should be `None`, as {negative_n} is negative.", + ); + + // `catch_unwind` only works when panics unwind rather than abort. + #[cfg(panic = "unwind")] + { + std::panic::catch_unwind(core::panic::AssertUnwindSafe(|| (-n).isqrt())).expect_err( + &format!("`({negative_n}).isqrt()` should have panicked, as {negative_n} is negative.") + ); + } + } + } + }; +} + +macro_rules! unsigned_check { + ($T:ident) => { + /// This takes an input and, if it's nonzero, checks that `isqrt` + /// produces the same numeric value for both + #[doc = concat!("`", stringify!($T), "` and ")] + #[doc = concat!("`NonZero<", stringify!($T), ">`.")] + fn isqrt_consistency_check(n: $T) { + // Zero cannot be turned into a `NonZero` value, so ignore it in + // this nonzero section. + if n > 0 { + assert_eq!( + n.isqrt(), + core::num::NonZero::<$T>::new(n) + .expect( + "Was not able to create a new `NonZero` value from a nonzero number." + ) + .isqrt() + .get(), + "`{n}.isqrt` should match `NonZero`'s `{n}.isqrt().get()`.", + ); + } + } + }; +} + +tests!(signed_check: i8 i16 i32 i64 i128); +tests!(unsigned_check: u8 u16 u32 u64 u128); diff --git a/core/tests/num/mod.rs b/core/tests/num/mod.rs index dad46ad88fe19..b14fe0b22c311 100644 --- a/core/tests/num/mod.rs +++ b/core/tests/num/mod.rs @@ -27,6 +27,7 @@ mod const_from; mod dec2flt; mod flt2dec; mod int_log; +mod int_sqrt; mod ops; mod wrapping; From c44af617f84c6938404624516036846d28e70df6 Mon Sep 17 00:00:00 2001 From: "Chai T. Rex" Date: Mon, 26 Aug 2024 01:36:25 -0400 Subject: [PATCH 10/10] Speed up `checked_isqrt` and `isqrt` methods * Use a lookup table for 8-bit integers and the Karatsuba square root algorithm for larger integers. * Include optimization hints that give the compiler the exact numeric range of results. --- core/src/num/int_macros.rs | 36 +++- core/src/num/int_sqrt.rs | 316 ++++++++++++++++++++++++++++++++++++ core/src/num/mod.rs | 1 + core/src/num/nonzero.rs | 33 +--- core/src/num/uint_macros.rs | 20 ++- 5 files changed, 371 insertions(+), 35 deletions(-) create mode 100644 core/src/num/int_sqrt.rs diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs index 229cd89bea3f0..f3d38b25ab4fe 100644 --- a/core/src/num/int_macros.rs +++ b/core/src/num/int_macros.rs @@ -1641,7 +1641,33 @@ macro_rules! int_impl { if self < 0 { None } else { - Some((self as $UnsignedT).isqrt() as Self) + // SAFETY: Input is nonnegative in this `else` branch. + let result = unsafe { + crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT + }; + + // Inform the optimizer what the range of outputs is. If + // testing `core` crashes with no panic message and a + // `num::int_sqrt::i*` test failed, it's because your edits + // caused these assertions to become false. + // + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never + // cause the output to decrease. Thus, since the input for + // nonnegative signed integers is bounded by + // `[0, <$ActualT>::MAX]`, sqrt(n) will be bounded by + // `[sqrt(0), sqrt(<$ActualT>::MAX)]`. + unsafe { + // SAFETY: `<$ActualT>::MAX` is nonnegative. + const MAX_RESULT: $SelfT = unsafe { + crate::num::int_sqrt::$ActualT(<$ActualT>::MAX) as $SelfT + }; + + crate::hint::assert_unchecked(result >= 0); + crate::hint::assert_unchecked(result <= MAX_RESULT); + } + + Some(result) } } @@ -2862,15 +2888,11 @@ macro_rules! int_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] + #[track_caller] pub const fn isqrt(self) -> Self { - // I would like to implement it as - // ``` - // self.checked_isqrt().expect("argument of integer square root must be non-negative") - // ``` - // but `expect` is not yet stable as a `const fn`. match self.checked_isqrt() { Some(sqrt) => sqrt, - None => panic!("argument of integer square root must be non-negative"), + None => crate::num::int_sqrt::panic_for_negative_argument(), } } diff --git a/core/src/num/int_sqrt.rs b/core/src/num/int_sqrt.rs new file mode 100644 index 0000000000000..601e81f69930f --- /dev/null +++ b/core/src/num/int_sqrt.rs @@ -0,0 +1,316 @@ +//! These functions use the [Karatsuba square root algorithm][1] to compute the +//! [integer square root](https://en.wikipedia.org/wiki/Integer_square_root) +//! for the primitive integer types. +//! +//! The signed integer functions can only handle **nonnegative** inputs, so +//! that must be checked before calling those. +//! +//! [1]: +//! "Paul Zimmermann. Karatsuba Square Root. \[Research Report\] RR-3805, +//! INRIA. 1999, pp.8. (inria-00072854)" + +/// This array stores the [integer square roots]( +/// https://en.wikipedia.org/wiki/Integer_square_root) and remainders of each +/// [`u8`](prim@u8) value. For example, `U8_ISQRT_WITH_REMAINDER[17]` will be +/// `(4, 1)` because the integer square root of 17 is 4 and because 17 is 1 +/// higher than 4 squared. +const U8_ISQRT_WITH_REMAINDER: [(u8, u8); 256] = { + let mut result = [(0, 0); 256]; + + let mut n: usize = 0; + let mut isqrt_n: usize = 0; + while n < result.len() { + result[n] = (isqrt_n as u8, (n - isqrt_n.pow(2)) as u8); + + n += 1; + if n == (isqrt_n + 1).pow(2) { + isqrt_n += 1; + } + } + + result +}; + +/// Returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any [`u8`](prim@u8) +/// input. +#[must_use = "this returns the result of the operation, \ + without modifying the original"] +#[inline] +pub const fn u8(n: u8) -> u8 { + U8_ISQRT_WITH_REMAINDER[n as usize].0 +} + +/// Generates an `i*` function that returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any **nonnegative** +/// input of a specific signed integer type. +macro_rules! signed_fn { + ($SignedT:ident, $UnsignedT:ident) => { + /// Returns the [integer square root]( + /// https://en.wikipedia.org/wiki/Integer_square_root) of any + /// **nonnegative** + #[doc = concat!("[`", stringify!($SignedT), "`](prim@", stringify!($SignedT), ")")] + /// input. + /// + /// # Safety + /// + /// This results in undefined behavior when the input is negative. + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const unsafe fn $SignedT(n: $SignedT) -> $SignedT { + debug_assert!(n >= 0, "Negative input inside `isqrt`."); + $UnsignedT(n as $UnsignedT) as $SignedT + } + }; +} + +signed_fn!(i8, u8); +signed_fn!(i16, u16); +signed_fn!(i32, u32); +signed_fn!(i64, u64); +signed_fn!(i128, u128); + +/// Generates a `u*` function that returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any input of +/// a specific unsigned integer type. +macro_rules! unsigned_fn { + ($UnsignedT:ident, $HalfBitsT:ident, $stages:ident) => { + /// Returns the [integer square root]( + /// https://en.wikipedia.org/wiki/Integer_square_root) of any + #[doc = concat!("[`", stringify!($UnsignedT), "`](prim@", stringify!($UnsignedT), ")")] + /// input. + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const fn $UnsignedT(mut n: $UnsignedT) -> $UnsignedT { + if n <= <$HalfBitsT>::MAX as $UnsignedT { + $HalfBitsT(n as $HalfBitsT) as $UnsignedT + } else { + // The normalization shift satisfies the Karatsuba square root + // algorithm precondition "a₃ ≥ b/4" where a₃ is the most + // significant quarter of `n`'s bits and b is the number of + // values that can be represented by that quarter of the bits. + // + // b/4 would then be all 0s except the second most significant + // bit (010...0) in binary. Since a₃ must be at least b/4, a₃'s + // most significant bit or its neighbor must be a 1. Since a₃'s + // most significant bits are `n`'s most significant bits, the + // same applies to `n`. + // + // The reason to shift by an even number of bits is because an + // even number of bits produces the square root shifted to the + // left by half of the normalization shift: + // + // sqrt(n << (2 * p)) + // sqrt(2.pow(2 * p) * n) + // sqrt(2.pow(2 * p)) * sqrt(n) + // 2.pow(p) * sqrt(n) + // sqrt(n) << p + // + // Shifting by an odd number of bits leaves an ugly sqrt(2) + // multiplied in: + // + // sqrt(n << (2 * p + 1)) + // sqrt(2.pow(2 * p + 1) * n) + // sqrt(2 * 2.pow(2 * p) * n) + // sqrt(2) * sqrt(2.pow(2 * p)) * sqrt(n) + // sqrt(2) * 2.pow(p) * sqrt(n) + // sqrt(2) * (sqrt(n) << p) + const EVEN_MAKING_BITMASK: u32 = !1; + let normalization_shift = n.leading_zeros() & EVEN_MAKING_BITMASK; + n <<= normalization_shift; + + let s = $stages(n); + + let denormalization_shift = normalization_shift >> 1; + s >> denormalization_shift + } + } + }; +} + +/// Generates the first stage of the computation after normalization. +/// +/// # Safety +/// +/// `$n` must be nonzero. +macro_rules! first_stage { + ($original_bits:literal, $n:ident) => {{ + debug_assert!($n != 0, "`$n` is zero in `first_stage!`."); + + const N_SHIFT: u32 = $original_bits - 8; + let n = $n >> N_SHIFT; + + let (s, r) = U8_ISQRT_WITH_REMAINDER[n as usize]; + + // Inform the optimizer that `s` is nonzero. This will allow it to + // avoid generating code to handle division-by-zero panics in the next + // stage. + // + // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn` + // macro recurses instead of continuing to this point, so the original + // `$n` wasn't a 0 if we've reached here. + // + // Then the `unsigned_fn` macro normalizes `$n` so that at least one of + // its two most-significant bits is a 1. + // + // Then this stage puts the eight most-significant bits of `$n` into + // `n`. This means that `n` here has at least one 1 bit in its two + // most-significant bits, making `n` nonzero. + // + // `U8_ISQRT_WITH_REMAINDER[n as usize]` will give a nonzero `s` when + // given a nonzero `n`. + unsafe { crate::hint::assert_unchecked(s != 0) }; + (s, r) + }}; +} + +/// Generates a middle stage of the computation. +/// +/// # Safety +/// +/// `$s` must be nonzero. +macro_rules! middle_stage { + ($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{ + debug_assert!($s != 0, "`$s` is zero in `middle_stage!`."); + + const N_SHIFT: u32 = $original_bits - <$ty>::BITS; + let n = ($n >> N_SHIFT) as $ty; + + const HALF_BITS: u32 = <$ty>::BITS >> 1; + const QUARTER_BITS: u32 = <$ty>::BITS >> 2; + const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1; + const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1; + + let lo = n & LOWER_HALF_1_BITS; + let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS); + let denominator = ($s as $ty) << 1; + let q = numerator / denominator; + let u = numerator % denominator; + + let mut s = ($s << QUARTER_BITS) as $ty + q; + let (mut r, overflow) = + ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q); + if overflow { + r = r.wrapping_add(2 * s - 1); + s -= 1; + } + + // Inform the optimizer that `s` is nonzero. This will allow it to + // avoid generating code to handle division-by-zero panics in the next + // stage. + // + // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn` + // macro recurses instead of continuing to this point, so the original + // `$n` wasn't a 0 if we've reached here. + // + // Then the `unsigned_fn` macro normalizes `$n` so that at least one of + // its two most-significant bits is a 1. + // + // Then these stages take as many of the most-significant bits of `$n` + // as will fit in this stage's type. For example, the stage that + // handles `u32` deals with the 32 most-significant bits of `$n`. This + // means that each stage has at least one 1 bit in `n`'s two + // most-significant bits, making `n` nonzero. + // + // Then this stage will produce the correct integer square root for + // that `n` value. Since `n` is nonzero, `s` will also be nonzero. + unsafe { crate::hint::assert_unchecked(s != 0) }; + (s, r) + }}; +} + +/// Generates the last stage of the computation before denormalization. +/// +/// # Safety +/// +/// `$s` must be nonzero. +macro_rules! last_stage { + ($ty:ty, $n:ident, $s:ident, $r:ident) => {{ + debug_assert!($s != 0, "`$s` is zero in `last_stage!`."); + + const HALF_BITS: u32 = <$ty>::BITS >> 1; + const QUARTER_BITS: u32 = <$ty>::BITS >> 2; + const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1; + + let lo = $n & LOWER_HALF_1_BITS; + let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS); + let denominator = ($s as $ty) << 1; + + let q = numerator / denominator; + let mut s = ($s << QUARTER_BITS) as $ty + q; + let (s_squared, overflow) = s.overflowing_mul(s); + if overflow || s_squared > $n { + s -= 1; + } + s + }}; +} + +/// Takes the normalized [`u16`](prim@u16) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u16_stages(n: u16) -> u16 { + let (s, r) = first_stage!(16, n); + last_stage!(u16, n, s, r) +} + +/// Takes the normalized [`u32`](prim@u32) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u32_stages(n: u32) -> u32 { + let (s, r) = first_stage!(32, n); + let (s, r) = middle_stage!(32, u16, n, s, r); + last_stage!(u32, n, s, r) +} + +/// Takes the normalized [`u64`](prim@u64) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u64_stages(n: u64) -> u64 { + let (s, r) = first_stage!(64, n); + let (s, r) = middle_stage!(64, u16, n, s, r); + let (s, r) = middle_stage!(64, u32, n, s, r); + last_stage!(u64, n, s, r) +} + +/// Takes the normalized [`u128`](prim@u128) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u128_stages(n: u128) -> u128 { + let (s, r) = first_stage!(128, n); + let (s, r) = middle_stage!(128, u16, n, s, r); + let (s, r) = middle_stage!(128, u32, n, s, r); + let (s, r) = middle_stage!(128, u64, n, s, r); + last_stage!(u128, n, s, r) +} + +unsigned_fn!(u16, u8, u16_stages); +unsigned_fn!(u32, u16, u32_stages); +unsigned_fn!(u64, u32, u64_stages); +unsigned_fn!(u128, u64, u128_stages); + +/// Instantiate this panic logic once, rather than for all the isqrt methods +/// on every single primitive type. +#[cold] +#[track_caller] +pub const fn panic_for_negative_argument() -> ! { + panic!("argument of integer square root cannot be negative") +} diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index 309e1ba958aee..e9e5324666ada 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -41,6 +41,7 @@ mod uint_macros; // import uint_impl! mod error; mod int_log10; +mod int_sqrt; mod nonzero; mod overflow_panic; mod saturating; diff --git a/core/src/num/nonzero.rs b/core/src/num/nonzero.rs index c6e9c249048a7..8b888f12da0b1 100644 --- a/core/src/num/nonzero.rs +++ b/core/src/num/nonzero.rs @@ -7,7 +7,7 @@ use crate::marker::{Freeze, StructuralPartialEq}; use crate::ops::{BitOr, BitOrAssign, Div, DivAssign, Neg, Rem, RemAssign}; use crate::panic::{RefUnwindSafe, UnwindSafe}; use crate::str::FromStr; -use crate::{fmt, hint, intrinsics, ptr, ub_checks}; +use crate::{fmt, intrinsics, ptr, ub_checks}; /// A marker trait for primitive types which can be zero. /// @@ -1545,31 +1545,14 @@ macro_rules! nonzero_integer_signedness_dependent_methods { without modifying the original"] #[inline] pub const fn isqrt(self) -> Self { - // The algorithm is based on the one presented in - // - // which cites as source the following C code: - // . - - let mut op = self.get(); - let mut res = 0; - let mut one = 1 << (self.ilog2() & !1); - - while one != 0 { - if op >= res + one { - op -= res + one; - res = (res >> 1) + one; - } else { - res >>= 1; - } - one >>= 2; - } + let result = self.get().isqrt(); - // SAFETY: The result fits in an integer with half as many bits. - // Inform the optimizer about it. - unsafe { hint::assert_unchecked(res < 1 << (Self::BITS / 2)) }; - - // SAFETY: The square root of an integer >= 1 is always >= 1. - unsafe { Self::new_unchecked(res) } + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never cause + // the output to decrease. Thus, since the input for nonzero + // unsigned integers has a lower bound of 1, the lower bound of the + // results will be sqrt(1), which is 1, so a result can't be zero. + unsafe { Self::new_unchecked(result) } } }; diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs index 0d0bbc5256f78..d9036abecc592 100644 --- a/core/src/num/uint_macros.rs +++ b/core/src/num/uint_macros.rs @@ -2762,10 +2762,24 @@ macro_rules! uint_impl { without modifying the original"] #[inline] pub const fn isqrt(self) -> Self { - match NonZero::new(self) { - Some(x) => x.isqrt().get(), - None => 0, + let result = crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT; + + // Inform the optimizer what the range of outputs is. If testing + // `core` crashes with no panic message and a `num::int_sqrt::u*` + // test failed, it's because your edits caused these assertions or + // the assertions in `fn isqrt` of `nonzero.rs` to become false. + // + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never + // cause the output to decrease. Thus, since the input for unsigned + // integers is bounded by `[0, <$ActualT>::MAX]`, sqrt(n) will be + // bounded by `[sqrt(0), sqrt(<$ActualT>::MAX)]`. + unsafe { + const MAX_RESULT: $SelfT = crate::num::int_sqrt::$ActualT(<$ActualT>::MAX) as $SelfT; + crate::hint::assert_unchecked(result <= MAX_RESULT); } + + result } /// Performs Euclidean division.