-
Notifications
You must be signed in to change notification settings - Fork 294
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #592 from heiher/loong-lsx
Add 128-bit SIMD implementation for LoongArch
- Loading branch information
Showing
3 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
use super::super::{BitMask, Tag}; | ||
use core::mem; | ||
use core::num::NonZeroU16; | ||
|
||
use core::arch::loongarch64::*; | ||
use mem::transmute; | ||
|
||
pub(crate) type BitMaskWord = u16; | ||
pub(crate) type NonZeroBitMaskWord = NonZeroU16; | ||
pub(crate) const BITMASK_STRIDE: usize = 1; | ||
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff; | ||
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; | ||
|
||
/// Abstraction over a group of control tags which can be scanned in | ||
/// parallel. | ||
/// | ||
/// This implementation uses a 128-bit LSX value. | ||
#[derive(Copy, Clone)] | ||
pub(crate) struct Group(v16i8); | ||
|
||
// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859 | ||
#[allow(clippy::use_self)] | ||
impl Group { | ||
/// Number of bytes in the group. | ||
pub(crate) const WIDTH: usize = mem::size_of::<Self>(); | ||
|
||
/// Returns a full group of empty tags, suitable for use as the initial | ||
/// value for an empty hash table. | ||
/// | ||
/// This is guaranteed to be aligned to the group size. | ||
#[inline] | ||
#[allow(clippy::items_after_statements)] | ||
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { | ||
#[repr(C)] | ||
struct AlignedTags { | ||
_align: [Group; 0], | ||
tags: [Tag; Group::WIDTH], | ||
} | ||
const ALIGNED_TAGS: AlignedTags = AlignedTags { | ||
_align: [], | ||
tags: [Tag::EMPTY; Group::WIDTH], | ||
}; | ||
&ALIGNED_TAGS.tags | ||
} | ||
|
||
/// Loads a group of tags starting at the given address. | ||
#[inline] | ||
#[allow(clippy::cast_ptr_alignment)] // unaligned load | ||
pub(crate) unsafe fn load(ptr: *const Tag) -> Self { | ||
Group(lsx_vld::<0>(ptr.cast())) | ||
} | ||
|
||
/// Loads a group of tags starting at the given address, which must be | ||
/// aligned to `mem::align_of::<Group>()`. | ||
#[inline] | ||
#[allow(clippy::cast_ptr_alignment)] | ||
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { | ||
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0); | ||
Group(lsx_vld::<0>(ptr.cast())) | ||
} | ||
|
||
/// Stores the group of tags to the given address, which must be | ||
/// aligned to `mem::align_of::<Group>()`. | ||
#[inline] | ||
#[allow(clippy::cast_ptr_alignment)] | ||
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { | ||
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0); | ||
lsx_vst::<0>(self.0, ptr.cast()); | ||
} | ||
|
||
/// Returns a `BitMask` indicating all tags in the group which have | ||
/// the given value. | ||
#[inline] | ||
pub(crate) fn match_tag(self, tag: Tag) -> BitMask { | ||
#[allow(clippy::missing_transmute_annotations)] | ||
unsafe { | ||
let cmp = lsx_vseq_b(self.0, lsx_vreplgr2vr_b(tag.0 as i32)); | ||
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16) | ||
} | ||
} | ||
|
||
/// Returns a `BitMask` indicating all tags in the group which are | ||
/// `EMPTY`. | ||
#[inline] | ||
pub(crate) fn match_empty(self) -> BitMask { | ||
#[allow(clippy::missing_transmute_annotations)] | ||
unsafe { | ||
let cmp = lsx_vseqi_b::<{ Tag::EMPTY.0 as i8 as i32 }>(self.0); | ||
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16) | ||
} | ||
} | ||
|
||
/// Returns a `BitMask` indicating all tags in the group which are | ||
/// `EMPTY` or `DELETED`. | ||
#[inline] | ||
pub(crate) fn match_empty_or_deleted(self) -> BitMask { | ||
#[allow(clippy::missing_transmute_annotations)] | ||
unsafe { | ||
// A tag is EMPTY or DELETED iff the high bit is set | ||
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(self.0))) as u16) | ||
} | ||
} | ||
|
||
/// Returns a `BitMask` indicating all tags in the group which are full. | ||
#[inline] | ||
pub(crate) fn match_full(&self) -> BitMask { | ||
#[allow(clippy::missing_transmute_annotations)] | ||
unsafe { | ||
// A tag is EMPTY or DELETED iff the high bit is set | ||
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskgez_b(self.0))) as u16) | ||
} | ||
} | ||
|
||
/// Performs the following transformation on all tags in the group: | ||
/// - `EMPTY => EMPTY` | ||
/// - `DELETED => EMPTY` | ||
/// - `FULL => DELETED` | ||
#[inline] | ||
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { | ||
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 | ||
// and high_bit = 0 (FULL) to 1000_0000 | ||
// | ||
// Here's this logic expanded to concrete values: | ||
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) | ||
// 1111_1111 | 1000_0000 = 1111_1111 | ||
// 0000_0000 | 1000_0000 = 1000_0000 | ||
#[allow(clippy::missing_transmute_annotations)] | ||
unsafe { | ||
let zero = lsx_vreplgr2vr_b(0); | ||
let special = lsx_vslt_b(self.0, zero); | ||
Group(transmute(lsx_vor_v( | ||
transmute(special), | ||
transmute(lsx_vreplgr2vr_b(Tag::DELETED.0 as i32)), | ||
))) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters