Skip to content

Commit

Permalink
Use slices rather than vectors in ZopfliHash (std only) (#27)
Browse files Browse the repository at this point in the history
* Optimize HashThing to use slices rather than Vec

(cherry picked from commit 281f38a)

* Fix stack overflow by building HashThing and ZopfliHash on heap

(cherry picked from commit a4ac5de)

* cargo fmt

(cherry picked from commit 0f3731c)

* Refactor: build the entire ZopfliHash in the unsafe code

* Bug fix: must explicitly import Box on nightly compiler

* Bug fix: use old code when `std` isn't enabled

* Bug fix

* Check for null and convert to Box<> before initializing

* refactor: drop `once_cell` dependency, apply to non-std, improve heap usage

* chore: fix no-std build

---------

Co-authored-by: Chris Hennick <[email protected]>
Co-authored-by: Alejandro González <[email protected]>
  • Loading branch information
3 people authored Jul 5, 2023
1 parent 74e43af commit 21d5521
Showing 1 changed file with 59 additions and 40 deletions.
99 changes: 59 additions & 40 deletions src/hash.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
use alloc::vec::Vec;
use alloc::{
alloc::{alloc, handle_alloc_error, Layout},
boxed::Box,
};
use core::ptr::{addr_of, addr_of_mut, NonNull};

use crate::util::{ZOPFLI_MIN_MATCH, ZOPFLI_WINDOW_MASK, ZOPFLI_WINDOW_SIZE};

Expand All @@ -11,47 +15,20 @@ pub enum Which {
Hash2,
}

#[derive(Clone)]
pub struct SmallerHashThing {
prev: u16, /* Index to index of prev. occurrence of same hash. */
hashval: Option<u16>, /* Index to hash value at this index. */
}

#[derive(Clone)]
pub struct HashThing {
head: Vec<i16>, /* Hash value to index of its most recent occurrence. */
prev_and_hashval: Vec<SmallerHashThing>,
head: [i16; 65536], /* Hash value to index of its most recent occurrence. */
prev_and_hashval: [SmallerHashThing; ZOPFLI_WINDOW_SIZE],
val: u16, /* Current hash value. */
}

impl HashThing {
fn new() -> HashThing {
HashThing {
head: vec![-1; 65536],
prev_and_hashval: (0..ZOPFLI_WINDOW_SIZE)
.map(|p| SmallerHashThing {
prev: p as u16,
hashval: None,
})
.collect(),
val: 0,
}
}

fn reset(&mut self) {
self.val = 0;

self.head.fill(-1);

let mut p = 0;
self.prev_and_hashval.fill_with(|| {
let thing = SmallerHashThing {
prev: p,
hashval: None,
};
p += 1;
thing
});
}

fn update(&mut self, hpos: usize) {
let hashval = self.val;
let index = self.val as usize;
Expand All @@ -74,25 +51,67 @@ impl HashThing {
}
}

#[derive(Clone)]
pub struct ZopfliHash {
hash1: HashThing,
hash2: HashThing,
pub same: [u16; ZOPFLI_WINDOW_SIZE], /* Amount of repetitions of same byte after this .*/
}

impl ZopfliHash {
pub fn new() -> ZopfliHash {
ZopfliHash {
hash1: HashThing::new(),
hash2: HashThing::new(),
same: [0; ZOPFLI_WINDOW_SIZE],
pub fn new() -> Box<ZopfliHash> {
const LAYOUT: Layout = Layout::new::<ZopfliHash>();

let ptr = NonNull::new(unsafe { alloc(LAYOUT) } as *mut ZopfliHash)
.unwrap_or_else(|| handle_alloc_error(LAYOUT));

unsafe {
Self::init(ptr);
Box::from_raw(ptr.as_ptr())
}
}

/// Initializes the [`ZopfliHash`] instance pointed by `hash` to an initial state.
///
/// ## Safety
/// `hash` must point to aligned, valid memory for writes.
unsafe fn init(hash: NonNull<Self>) {
let hash = hash.as_ptr();

// SAFETY: addr_of(_mut) macros are used to avoid creating intermediate references, which
// are undefined behavior when data is uninitialized. Note that it also is UB to
// assume that integer values and arrays can be read after allocating their memory:
// the allocator returns valid, but uninitialized pointers that are not guaranteed
// to hold a fixed bit pattern (c.f. core::mem::MaybeUnit docs and
// https://doc.rust-lang.org/std/ptr/index.html#safety).

for i in 0..ZOPFLI_WINDOW_SIZE {
// Arrays are guaranteed to be laid out with their elements placed in consecutive
// memory positions: https://doc.rust-lang.org/reference/type-layout.html#array-layout.
// Therefore, a pointer to an array has the same address as the pointer to its first
// element, and adding size_of::<N>() bytes to that address yields the address of the
// second element, and so on.
let prev_and_hashval =
(addr_of_mut!((*hash).hash1.prev_and_hashval) as *mut SmallerHashThing).add(i);
addr_of_mut!((*prev_and_hashval).prev).write(i as u16);
addr_of_mut!((*prev_and_hashval).hashval).write(None);
}

// Rust signed integers are guaranteed to be represented in two's complement notation:
// https://doc.rust-lang.org/reference/types/numeric.html#integer-types
// In this notation, -1 is expressed as an all-ones value. Therefore, writing
// size_of::<[i16; N]> all-ones bytes initializes all of them to -1.
addr_of_mut!((*hash).hash1.head).write_bytes(0xFF, 1);
addr_of_mut!((*hash).hash1.val).write(0);

addr_of_mut!((*hash).hash2).copy_from_nonoverlapping(addr_of!((*hash).hash1), 1);

// Zero-initializes all the array elements
addr_of_mut!((*hash).same).write_bytes(0, 1);
}

pub fn reset(&mut self) {
self.hash1.reset();
self.hash2.reset();
self.same = [0; ZOPFLI_WINDOW_SIZE];
unsafe { Self::init(NonNull::new(self).unwrap()) }
}

pub fn warmup(&mut self, arr: &[u8], pos: usize, end: usize) {
Expand Down

0 comments on commit 21d5521

Please sign in to comment.