Skip to content

Commit

Permalink
Transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Jun 11, 2024
1 parent 7828401 commit f08772c
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 1 deletion.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ all = "deny"
[[bench]]
name = "bitpacking"
harness = false

[[bench]]
name = "transpose"
harness = false
24 changes: 24 additions & 0 deletions benches/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#![allow(incomplete_features)]
#![feature(generic_const_exprs)]

use std::mem::size_of;

use criterion::{Criterion, criterion_group, criterion_main};

use fastlanes::{BitPacking, Transpose};

fn transpose(c: &mut Criterion) {
let mut group = c.benchmark_group("transpose");
group.bench_function("transpose u16", |b| {
let mut values: [u16; 1024] = [0; 1024];
for i in 0..1024 {
values[i] = (i % u16::MAX as usize) as u16;
}

let mut transposed = [0; 1024];
b.iter(|| Transpose::transpose(&values, &mut transposed));
});
}

criterion_group!(benches, transpose);
criterion_main!(benches);
5 changes: 4 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ use std::mem::size_of;
use num_traits::{PrimInt, Unsigned};

mod bitpacking;
mod transpose;

pub use bitpacking::*;
pub use transpose::*;

pub const ORDER: [u8; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
pub const FL_ORDER: [u8; 8] = [0, 4, 2, 6, 1, 5, 3, 7];

pub trait FastLanes: Sized + Unsigned + PrimInt {
const T: usize = size_of::<Self>() * 8;
Expand Down
57 changes: 57 additions & 0 deletions src/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use seq_macro::seq;
use crate::{FastLanes, FL_ORDER};

pub trait Transpose: FastLanes {
const MASK: [usize; 1024] = transpose_mask();
const UNMASK: [usize; 1024] = untranspose_mask();

fn transpose(input: &[Self; 1024], output: &mut [Self; 1024]);
fn untranspose(input: &[Self; 1024], output: &mut [Self; 1024]);
}

impl<T> Transpose for T where T: FastLanes {
#[inline(never)]
fn transpose(input: &[Self; 1024], output: &mut [Self; 1024]) {
seq!(i in 0..1024 {
unsafe { *output.get_unchecked_mut(i) = *input.get_unchecked(Self::MASK[i]) };
});
}

#[inline(never)]
fn untranspose(input: &[Self; 1024], output: &mut [Self; 1024]) {
seq!(i in 0..1024 {
unsafe { *output.get_unchecked_mut(i) = *input.get_unchecked(Self::UNMASK[i]) };
});
}
}

const fn transpose_mask() -> [usize; 1024] {
let mut mask = [0; 1024];
let mut mask_idx = 0;
let mut row = 0;
while row < 8 {
let mut order = 0;
while order < FL_ORDER.len() {
let mut lane = 0;
while lane < 16 {
mask[mask_idx] = (lane * 64) + (FL_ORDER[order] as usize * 8) + row;
mask_idx += 1;
lane += 1;
}
order += 1;
}
row += 1;
}
mask
}

const fn untranspose_mask() -> [usize; 1024] {
const MASK: [usize; 1024] = transpose_mask();
let mut mask = [0; 1024];
let mut mask_idx = 0;
while mask_idx < 1024 {
mask[mask_idx] = MASK[mask_idx];
mask_idx += 1;
}
mask
}

0 comments on commit f08772c

Please sign in to comment.