Skip to content

Commit

Permalink
Optimize atomic float on NVPTX
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Jan 12, 2025
1 parent 582a915 commit a8a8058
Show file tree
Hide file tree
Showing 6 changed files with 469 additions and 5 deletions.
39 changes: 37 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn main() {

if version.minor >= 80 {
println!(
r#"cargo:rustc-check-cfg=cfg(target_feature,values("experimental-zacas","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
r#"cargo:rustc-check-cfg=cfg(target_feature,values("experimental-zacas","sm_70","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
);

// Custom cfgs set by build script. Not public API.
Expand All @@ -58,7 +58,7 @@ fn main() {
// TODO: handle multi-line target_feature_fallback
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
println!(
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha"))"#
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","sm_70","v6","zaamo","zabha"))"#
);
}

Expand Down Expand Up @@ -175,6 +175,11 @@ fn main() {
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
}
}
"nvptx64" => {
if version.nightly && is_allowed_feature("asm_experimental_arch") {
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
}
}
_ => {}
}
}
Expand Down Expand Up @@ -435,6 +440,36 @@ fn main() {
// nand (nnr{,g}k), select (sel{,g}r), etc.
target_feature_fallback("miscellaneous-extensions-3", arch13_features);
}
"nvptx64" => {
let mut sm_70 = false;
if let Some(rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") {
for mut flag in rustflags.to_string_lossy().split('\x1f') {
flag = strip_prefix(flag, "-C").unwrap_or(flag);
if let Some(flag) = strip_prefix(flag, "target-feature=") {
for s in flag.split(',') {
// TODO: Handles cases where a specific target feature
// implicitly enables another target feature.
match (s.as_bytes().first(), s.get(1..)) {
(Some(b'+'), Some(f)) => {
if let Some(sm) = strip_prefix(f, "sm_") {
if let Ok(sm) = sm.parse::<u32>() {
if sm >= 70 {
sm_70 = true;
}
}
}
}
(Some(b'-'), Some(_f)) => {
// TODO
}
_ => {}
}
}
}
}
}
target_feature_fallback("sm_70", sm_70);
}
_ => {}
}
}
Expand Down
17 changes: 15 additions & 2 deletions src/imp/float/int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ Note that most of `fetch_*` operations of atomic floats are implemented using
CAS loops, which can be slower than equivalent operations of atomic integers.
AArch64 with FEAT_LSFE and GPU targets have atomic instructions for float.
Both will use architecture-specific implementations instead of this implementation in the
future: https://github.com/taiki-e/portable-atomic/issues/34 / https://github.com/taiki-e/portable-atomic/pull/45
See nvptx.rs for NVPTX.
AArch64 with FEAT_LSFE will also use architecture-specific implementations instead of this implementation in the
future: https://github.com/taiki-e/portable-atomic/pull/201
*/

// TODO: fetch_{minimum,maximum}* https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3008r2.html / https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
Expand Down Expand Up @@ -203,9 +204,21 @@ macro_rules! atomic_float {
cfg_has_atomic_16! {
atomic_float!(AtomicF16, f16, AtomicU16, u16, 2);
}
#[cfg(not(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
)))]
cfg_has_atomic_32! {
atomic_float!(AtomicF32, f32, AtomicU32, u32, 4);
}
#[cfg(not(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
)))]
cfg_has_atomic_64! {
atomic_float!(AtomicF64, f64, AtomicU64, u64, 8);
}
Expand Down
27 changes: 27 additions & 0 deletions src/imp/float/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,40 @@ Atomic float implementations

mod int;

#[cfg(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
))]
mod nvptx;

#[cfg(portable_atomic_unstable_f16)]
cfg_has_atomic_16! {
pub(crate) use self::int::AtomicF16;
}
#[cfg(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
))]
pub(crate) use self::int::{AtomicF32, AtomicF64};
#[cfg(not(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
)))]
cfg_has_atomic_32! {
pub(crate) use self::int::AtomicF32;
}
#[cfg(not(all(
target_arch = "nvptx64",
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
not(any(miri, portable_atomic_sanitize_thread)),
portable_atomic_unstable_asm_experimental_arch,
)))]
cfg_has_atomic_64! {
pub(crate) use self::int::AtomicF64;
}
Expand Down
Loading

0 comments on commit a8a8058

Please sign in to comment.