Skip to content

Commit

Permalink
crc: progress
Browse files Browse the repository at this point in the history
  • Loading branch information
BurntSushi committed Mar 7, 2020
1 parent 13d8b4e commit 423534b
Show file tree
Hide file tree
Showing 9 changed files with 412 additions and 19 deletions.
124 changes: 124 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
use std::env;
use std::fs::File;
use std::io::{self, Write};
use std::path::{Path, PathBuf};

const CASTAGNOLI_POLY: u32 = 0x82f63b78;

type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;

fn main() {
if let Err(err) = try_main() {
panic!("{}", err);
}
}

fn try_main() -> Result<()> {
let out_dir = match env::var_os("OUT_DIR") {
None => {
return Err(From::from("OUT_DIR environment variable not defined"))
}
Some(out_dir) => PathBuf::from(out_dir),
};
write_tag_lookup_table(&out_dir)?;
write_crc_tables(&out_dir)?;
Ok(())
}

fn write_tag_lookup_table(out_dir: &Path) -> Result<()> {
let out_path = out_dir.join("tag.rs");
let mut out = io::BufWriter::new(File::create(out_path)?);

writeln!(out, "pub const TAG_LOOKUP_TABLE: [u16; 256] = [")?;
for b in 0u8..=255 {
writeln!(out, " {},", tag_entry(b))?;
}
writeln!(out, "];")?;
Ok(())
}

fn tag_entry(b: u8) -> u16 {
let b = b as u16;
match b & 0b00000011 {
0b00 => {
let lit_len = (b >> 2) + 1;
if lit_len <= 60 {
lit_len
} else {
assert!(lit_len <= 64);
(lit_len - 60) << 11
}
}
0b01 => {
let len = 4 + ((b >> 2) & 0b111);
let offset = (b >> 5) & 0b111;
(1 << 11) | (offset << 8) | len
}
0b10 => {
let len = 1 + (b >> 2);
(2 << 11) | len
}
0b11 => {
let len = 1 + (b >> 2);
(4 << 11) | len
}
_ => unreachable!(),
}
}

fn write_crc_tables(out_dir: &Path) -> Result<()> {
let out_path = out_dir.join("crc32_table.rs");
let mut out = io::BufWriter::new(File::create(out_path)?);

let table = make_table(CASTAGNOLI_POLY);
let table16 = make_table16(CASTAGNOLI_POLY);

writeln!(out, "pub const TABLE: [u32; 256] = [")?;
for &x in table.iter() {
writeln!(out, " {},", x)?;
}
writeln!(out, "];\n")?;

writeln!(out, "pub const TABLE16: [[u32; 256]; 16] = [")?;
for table in table16.iter() {
writeln!(out, " [")?;
for &x in table.iter() {
writeln!(out, " {},", x)?;
}
writeln!(out, " ],")?;
}
writeln!(out, "];")?;

out.flush()?;

Ok(())
}

fn make_table16(poly: u32) -> [[u32; 256]; 16] {
let mut tab = [[0; 256]; 16];
tab[0] = make_table(poly);
for i in 0..256 {
let mut crc = tab[0][i];
for j in 1..16 {
crc = (crc >> 8) ^ tab[0][crc as u8 as usize];
tab[j][i] = crc;
}
}
tab
}

fn make_table(poly: u32) -> [u32; 256] {
let mut tab = [0; 256];
for i in 0u32..256u32 {
let mut crc = i;
for _ in 0..8 {
if crc & 1 == 1 {
crc = (crc >> 1) ^ poly;
} else {
crc >>= 1;
}
}
tab[i as usize] = crc;
}
tab
}
120 changes: 120 additions & 0 deletions src/bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#![allow(warnings)]

use std::convert::TryInto;
use std::io;

/// Read a u16 in little endian format from the beginning of the given slice.
/// This panics if the slice has length less than 2.
pub fn read_u16_le(slice: &[u8]) -> u16 {
u16::from_le_bytes(slice[..2].try_into().unwrap())
}

/// Read a u24 (returned as a u32 with the most significant 8 bits always set
/// to 0) in little endian format from the beginning of the given slice. This
/// panics if the slice has length less than 3.
pub fn read_u24_le(slice: &[u8]) -> u32 {
slice[0] as u32 | (slice[1] as u32) << 8 | (slice[2] as u32) << 16
}

/// Read a u32 in little endian format from the beginning of the given slice.
/// This panics if the slice has length less than 4.
pub fn read_u32_le(slice: &[u8]) -> u32 {
u32::from_le_bytes(slice[..4].try_into().unwrap())
}

/// Like read_u32_le, but from an io::Read implementation. If io::Read does
/// not yield at least 4 bytes, then this returns an unexpected EOF error.
pub fn io_read_u32_le<R: io::Read>(mut rdr: R) -> io::Result<u32> {
let mut buf = [0; 4];
rdr.read_exact(&mut buf)?;
Ok(u32::from_le_bytes(buf))
}

/// Write a u16 in little endian format to the beginning of the given slice.
/// This panics if the slice has length less than 2.
pub fn write_u16_le(n: u16, slice: &mut [u8]) {
assert!(slice.len() >= 2);
let bytes = n.to_le_bytes();
slice[0] = bytes[0];
slice[1] = bytes[1];
}

/// Write a u24 (given as a u32 where the most significant 8 bits are ignored)
/// in little endian format to the beginning of the given slice. This panics
/// if the slice has length less than 3.
pub fn write_u24_le(n: u32, slice: &mut [u8]) {
slice[0] = n as u8;
slice[1] = (n >> 8) as u8;
slice[2] = (n >> 16) as u8;
}

/// Write a u32 in little endian format to the beginning of the given slice.
/// This panics if the slice has length less than 4.
pub fn write_u32_le(n: u32, slice: &mut [u8]) {
assert!(slice.len() >= 4);
let bytes = n.to_le_bytes();
slice[0] = bytes[0];
slice[1] = bytes[1];
slice[2] = bytes[2];
slice[3] = bytes[3];
}

/// https://developers.google.com/protocol-buffers/docs/encoding#varints
pub fn write_varu64(data: &mut [u8], mut n: u64) -> usize {
let mut i = 0;
while n >= 0b1000_0000 {
data[i] = (n as u8) | 0b1000_0000;
n >>= 7;
i += 1;
}
data[i] = n as u8;
i + 1
}

/// https://developers.google.com/protocol-buffers/docs/encoding#varints
pub fn read_varu64(data: &[u8]) -> (u64, usize) {
let mut n: u64 = 0;
let mut shift: u32 = 0;
for (i, &b) in data.iter().enumerate() {
if b < 0b1000_0000 {
return match (b as u64).checked_shl(shift) {
None => (0, 0),
Some(b) => (n | b, i + 1),
};
}
match ((b as u64) & 0b0111_1111).checked_shl(shift) {
None => return (0, 0),
Some(b) => n |= b,
}
shift += 7;
}
(0, 0)
}

/// Does an unaligned load of a little endian encoded u32.
///
/// This is unsafe because `data` must point to some memory of size at least 4.
pub unsafe fn loadu_u32_le(data: *const u8) -> u32 {
loadu_u32_ne(data).to_le()
}

/// Does an unaligned load of a native endian encoded u32.
///
/// This is unsafe because `data` must point to some memory of size at least 4.
pub unsafe fn loadu_u32_ne(data: *const u8) -> u32 {
(data as *const u32).read_unaligned()
}

/// Does an unaligned load of a little endian encoded u64.
///
/// This is unsafe because `data` must point to some memory of size at least 8.
pub unsafe fn loadu_u64_le(data: *const u8) -> u64 {
loadu_u64_ne(data).to_le()
}

/// Does an unaligned load of a native endian encoded u64.
///
/// This is unsafe because `data` must point to some memory of size at least 8.
pub unsafe fn loadu_u64_ne(data: *const u8) -> u64 {
(data as *const u64).read_unaligned()
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ pub use crate::map::{Map, MapBuilder};
pub use crate::set::{Set, SetBuilder};
pub use crate::stream::{IntoStreamer, Streamer};

mod bytes;
mod error;
#[path = "automaton/mod.rs"]
mod inner_automaton;
Expand Down
10 changes: 7 additions & 3 deletions src/raw/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::io::{self, Write};
use std::io;

use byteorder::{LittleEndian, WriteBytesExt};

Expand Down Expand Up @@ -219,8 +219,12 @@ impl<W: io::Write> Builder<W> {
let root_addr = self.compile(&root_node)?;
self.wtr.write_u64::<LittleEndian>(self.len as u64)?;
self.wtr.write_u64::<LittleEndian>(root_addr as u64)?;
self.wtr.flush()?;
Ok(self.wtr.into_inner())

let sum = self.wtr.masked_checksum();
let mut wtr = self.wtr.into_inner();
wtr.write_u32::<LittleEndian>(sum)?;
wtr.flush()?;
Ok(wtr)
}

fn insert_output<B>(&mut self, bs: B, out: Option<Output>) -> Result<()>
Expand Down
17 changes: 15 additions & 2 deletions src/raw/counting_writer.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
use std::io;

/// Wraps any writer and counts bytes written.
use crate::raw::crc32::CheckSummer;

/// Wraps any writer that counts and checksums bytes written.
pub struct CountingWriter<W> {
wtr: W,
cnt: u64,
summer: CheckSummer,
}

impl<W: io::Write> CountingWriter<W> {
/// Wrap the given writer with a counter.
pub fn new(wtr: W) -> CountingWriter<W> {
CountingWriter { wtr, cnt: 0 }
CountingWriter { wtr, cnt: 0, summer: CheckSummer::new() }
}

/// Return the total number of bytes written to the underlying writer.
Expand All @@ -20,6 +23,15 @@ impl<W: io::Write> CountingWriter<W> {
self.cnt
}

/// Returns the masked CRC32C checksum of the bytes written so far.
///
/// This "masked" checksum is the same one used by the Snappy frame format.
/// Masking is supposed to make the checksum robust with respect to data
/// that contains the checksum itself.
pub fn masked_checksum(&self) -> u32 {
self.summer.masked()
}

/// Unwrap the counting writer and return the inner writer.
pub fn into_inner(self) -> W {
self.wtr
Expand All @@ -33,6 +45,7 @@ impl<W: io::Write> CountingWriter<W> {

impl<W: io::Write> io::Write for CountingWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.summer.update(buf);
let n = self.wtr.write(buf)?;
self.cnt += n as u64;
Ok(n)
Expand Down
Loading

0 comments on commit 423534b

Please sign in to comment.