From 3ea8c3dcdb3937fb6102c16627c52ce74ac63d13 Mon Sep 17 00:00:00 2001 From: Jonathan Giddy Date: Sun, 7 May 2023 07:32:27 +0100 Subject: [PATCH] Fix GzDecoder Write partial filenames and comments If the gzip header contains an optional filename or comment but they are not completely contained in the buffer sent to a `write::GzDecoder`, then a valid header is created, missing data from these optional sections. A subsequent write call will treat the remaining header as encoded data and attempt to decode it, generally causing a panic. This change rewrites the header parsing code to handle partial headers correctly for both `Read` (where `WouldBlock` is handled specially) and `Write` (where `UnexpectedEof` is handled specially). --- src/gz/bufread.rs | 284 +++++------------------------------- src/gz/mod.rs | 356 ++++++++++++++++++++++------------------------ src/gz/write.rs | 105 ++++++++------ 3 files changed, 272 insertions(+), 473 deletions(-) diff --git a/src/gz/bufread.rs b/src/gz/bufread.rs index 8db25605..b59bf21c 100644 --- a/src/gz/bufread.rs +++ b/src/gz/bufread.rs @@ -3,11 +3,7 @@ use std::io; use std::io::prelude::*; use std::mem; -use super::corrupt; -use super::read_gz_header_part; -use super::Buffer; -use super::GzHeaderPartial; -use super::{GzBuilder, GzHeader}; +use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::CrcReader; use crate::deflate; use crate::Compression; @@ -209,7 +205,7 @@ pub struct GzDecoder { #[derive(Debug)] enum GzState { - Header(GzHeaderPartial), + Header(GzHeaderParser), Body(GzHeader), Finished(GzHeader, usize, [u8; 8]), Err(io::Error), @@ -220,19 +216,13 @@ impl GzDecoder { /// Creates a new decoder from the given reader, immediately parsing the /// gzip header. pub fn new(mut r: R) -> GzDecoder { - let mut part = GzHeaderPartial::new(); + let mut header_parser = GzHeaderParser::new(); - let result = { - let mut reader = Buffer::new(&mut part, &mut r); - read_gz_header_part(&mut reader) - }; - - let state = match result { - Ok(()) => { - let header = part.take_header(); - GzState::Body(header) + let state = match header_parser.parse(&mut r) { + Ok(_) => GzState::Body(GzHeader::from(header_parser)), + Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { + GzState::Header(header_parser) } - Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), Err(err) => GzState::Err(err), }; @@ -280,108 +270,61 @@ impl GzDecoder { impl Read for GzDecoder { fn read(&mut self, into: &mut [u8]) -> io::Result { - let GzDecoder { - state, - reader, - multi, - } = self; - loop { - *state = match mem::replace(state, GzState::End(None)) { - GzState::Header(mut part) => { - let result = { - let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); - read_gz_header_part(&mut reader) - }; - match result { - Ok(()) => { - let header = part.take_header(); - GzState::Body(header) - } - Err(err) if io::ErrorKind::WouldBlock == err.kind() => { - *state = GzState::Header(part); - return Err(err); - } - Err(err) => return Err(err), - } + match &mut self.state { + GzState::Header(parser) => { + parser.parse(self.reader.get_mut().get_mut())?; + self.state = GzState::Body(GzHeader::from(mem::take(parser))); } GzState::Body(header) => { if into.is_empty() { - *state = GzState::Body(header); return Ok(0); } - - let n = match reader.read(into) { - Ok(n) => n, - Err(err) => { - if io::ErrorKind::WouldBlock == err.kind() { - *state = GzState::Body(header); - } - - return Err(err); + match self.reader.read(into)? { + 0 => { + self.state = GzState::Finished(mem::take(header), 0, [0; 8]); } - }; - - match n { - 0 => GzState::Finished(header, 0, [0; 8]), n => { - *state = GzState::Body(header); return Ok(n); } } } - GzState::Finished(header, pos, mut buf) => { - if pos < buf.len() { - let n = match reader.get_mut().get_mut().read(&mut buf[pos..]) { - Ok(n) => { - if n == 0 { - return Err(io::ErrorKind::UnexpectedEof.into()); - } else { - n - } - } - Err(err) => { - if io::ErrorKind::WouldBlock == err.kind() { - *state = GzState::Finished(header, pos, buf); - } - - return Err(err); - } - }; - - GzState::Finished(header, pos + n, buf) + GzState::Finished(header, pos, buf) => { + if *pos < buf.len() { + *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; } else { let (crc, amt) = finish(&buf); - if crc != reader.crc().sum() || amt != reader.crc().amount() { + if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { + self.state = GzState::End(Some(mem::take(header))); return Err(corrupt()); - } else if *multi { - let is_eof = match reader.get_mut().get_mut().fill_buf() { - Ok(buf) => buf.is_empty(), - Err(err) => { - if io::ErrorKind::WouldBlock == err.kind() { - *state = GzState::Finished(header, pos, buf); - } - - return Err(err); - } - }; + } else if self.multi { + let is_eof = self + .reader + .get_mut() + .get_mut() + .fill_buf() + .map(|buf| buf.is_empty())?; if is_eof { - GzState::End(Some(header)) + self.state = GzState::End(Some(mem::take(header))); } else { - reader.reset(); - reader.get_mut().reset_data(); - GzState::Header(GzHeaderPartial::new()) + self.reader.reset(); + self.reader.get_mut().reset_data(); + self.state = GzState::Header(GzHeaderParser::new()) } } else { - GzState::End(Some(header)) + self.state = GzState::End(Some(mem::take(header))); } } } - GzState::Err(err) => return Err(err), + GzState::Err(err) => { + let result = Err(mem::replace(err, io::ErrorKind::Other.into())); + self.state = GzState::End(None); + return result; + } GzState::End(_) => return Ok(0), - }; + } } } } @@ -478,156 +421,3 @@ impl Read for MultiGzDecoder { self.0.read(into) } } - -#[cfg(test)] -pub mod tests { - use crate::gz::bufread::*; - use std::io; - use std::io::{Cursor, Read, Write}; - - //a cursor turning EOF into blocking errors - #[derive(Debug)] - pub struct BlockingCursor { - pub cursor: Cursor>, - } - - impl BlockingCursor { - pub fn new() -> BlockingCursor { - BlockingCursor { - cursor: Cursor::new(Vec::new()), - } - } - - pub fn set_position(&mut self, pos: u64) { - self.cursor.set_position(pos) - } - - pub fn position(&mut self) -> u64 { - self.cursor.position() - } - } - - impl Write for BlockingCursor { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.cursor.write(buf) - } - fn flush(&mut self) -> io::Result<()> { - self.cursor.flush() - } - } - - impl Read for BlockingCursor { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - //use the cursor, except it turns eof into blocking error - let r = self.cursor.read(buf); - match r { - Err(ref err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(io::ErrorKind::WouldBlock.into()); - } - } - Ok(0) => { - //regular EOF turned into blocking error - return Err(io::ErrorKind::WouldBlock.into()); - } - Ok(_n) => {} - } - r - } - } - #[test] - // test function read_and_forget of Buffer - fn buffer_read_and_forget() { - // this is unused except for the buffering - let mut part = GzHeaderPartial::new(); - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - let mut out = Vec::with_capacity(7); - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // First read : successful for one byte - let mut reader = Buffer::new(&mut part, &mut r); - out.resize(1, 0); - match reader.read_and_forget(&mut out) { - Ok(1) => {} - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Second read : incomplete for 7 bytes (we have only 2) - out.resize(7, 0); - match reader.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with incomplete"); - } - } - - // 3 more data bytes have arrived - let pos = r.position(); - let data2 = vec![4, 5, 6]; - match r.write_all(&data2) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos); - - // Third read : still incomplete for 7 bytes (we have 5) - let mut reader2 = Buffer::new(&mut part, &mut r); - match reader2.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with more incomplete"); - } - } - - // 3 more data bytes have arrived again - let pos2 = r.position(); - let data3 = vec![7, 8, 9]; - match r.write_all(&data3) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos2); - - // Fourth read : now successful for 7 bytes - let mut reader3 = Buffer::new(&mut part, &mut r); - match reader3.read_and_forget(&mut out) { - Ok(7) => { - assert_eq!(out[0], 2); - assert_eq!(out[6], 8); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Fifth read : successful for one more byte - out.resize(1, 0); - match reader3.read_and_forget(&mut out) { - Ok(1) => { - assert_eq!(out[0], 9); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - } -} diff --git a/src/gz/mod.rs b/src/gz/mod.rs index 26152c16..95ed8375 100644 --- a/src/gz/mod.rs +++ b/src/gz/mod.rs @@ -1,5 +1,5 @@ use std::ffi::CString; -use std::io::{self, prelude::*}; +use std::io::{BufRead, Error, ErrorKind, Read, Result, Write}; use std::time; use crate::bufreader::BufReader; @@ -9,11 +9,16 @@ pub static FHCRC: u8 = 1 << 1; pub static FEXTRA: u8 = 1 << 2; pub static FNAME: u8 = 1 << 3; pub static FCOMMENT: u8 = 1 << 4; +pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7; pub mod bufread; pub mod read; pub mod write; +// The maximum length of the header filename and comment fields. More than +// enough for these fields in reasonable use, but prevents possible attacks. +const MAX_HEADER_BUF: usize = 65535; + /// A structure representing the header of a gzip stream. /// /// The header can contain metadata about the file that was compressed, if @@ -82,151 +87,201 @@ impl GzHeader { } } -#[derive(Debug)] -pub enum GzHeaderParsingState { - Start, - Xlen, - Extra, - Filename, - Comment, - Crc, +#[derive(Debug, Default)] +pub enum GzHeaderState { + Start(u8, [u8; 10]), + Xlen(Option>, u8, [u8; 2]), + Extra(Option>, u16), + Filename(Option>), + Comment(Option>), + Crc(Option>, u8, [u8; 2]), + #[default] + Complete, } -#[derive(Debug)] -pub struct GzHeaderPartial { - buf: Vec, - state: GzHeaderParsingState, - flg: u8, - xlen: u16, - crc: Crc, +#[derive(Debug, Default)] +pub struct GzHeaderParser { + state: GzHeaderState, + flags: u8, header: GzHeader, } -impl GzHeaderPartial { - fn new() -> GzHeaderPartial { - GzHeaderPartial { - buf: Vec::with_capacity(10), // minimum header length - state: GzHeaderParsingState::Start, - flg: 0, - xlen: 0, - crc: Crc::new(), - header: GzHeader { - extra: None, - filename: None, - comment: None, - operating_system: 0, - mtime: 0, - }, +impl GzHeaderParser { + fn new() -> Self { + GzHeaderParser { + state: GzHeaderState::Start(0, [0; 10]), + flags: 0, + header: GzHeader::default(), } } - pub fn take_header(self) -> GzHeader { - self.header - } -} - -fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { - loop { - match r.part.state { - GzHeaderParsingState::Start => { - let mut header = [0; 10]; - r.read_and_forget(&mut header)?; - - if header[0] != 0x1f || header[1] != 0x8b { - return Err(bad_header()); - } - if header[2] != 8 { - return Err(bad_header()); + fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> { + loop { + match &mut self.state { + GzHeaderState::Start(count, buffer) => { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + // Gzip identification bytes + if buffer[0] != 0x1f || buffer[1] != 0x8b { + return Err(bad_header()); + } + // Gzip compression method (8 = deflate) + if buffer[2] != 8 { + return Err(bad_header()); + } + self.flags = buffer[3]; + // RFC1952: "must give an error indication if any reserved bit is non-zero" + if self.flags & FRESERVED != 0 { + return Err(bad_header()); + } + self.header.mtime = ((buffer[4] as u32) << 0) + | ((buffer[5] as u32) << 8) + | ((buffer[6] as u32) << 16) + | ((buffer[7] as u32) << 24); + let _xfl = buffer[8]; + self.header.operating_system = buffer[9]; + let crc = if self.flags & FHCRC != 0 { + let mut crc = Box::new(Crc::new()); + crc.update(buffer); + Some(crc) + } else { + None + }; + self.state = GzHeaderState::Xlen(crc, 0, [0; 2]); } - - r.part.flg = header[3]; - r.part.header.mtime = ((header[4] as u32) << 0) - | ((header[5] as u32) << 8) - | ((header[6] as u32) << 16) - | ((header[7] as u32) << 24); - let _xfl = header[8]; - r.part.header.operating_system = header[9]; - r.part.state = GzHeaderParsingState::Xlen; - } - GzHeaderParsingState::Xlen => { - if r.part.flg & FEXTRA != 0 { - r.part.xlen = read_le_u16(r)?; + GzHeaderState::Xlen(crc, count, buffer) => { + if self.flags & FEXTRA != 0 { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + if let Some(crc) = crc { + crc.update(buffer); + } + let xlen = parse_le_u16(&buffer); + self.header.extra = Some(vec![0; xlen as usize]); + self.state = GzHeaderState::Extra(crc.take(), 0); + } else { + self.state = GzHeaderState::Filename(crc.take()); + } } - r.part.state = GzHeaderParsingState::Extra; - } - GzHeaderParsingState::Extra => { - if r.part.flg & FEXTRA != 0 { - let mut extra = vec![0; r.part.xlen as usize]; - r.read_and_forget(&mut extra)?; - r.part.header.extra = Some(extra); + GzHeaderState::Extra(crc, count) => { + debug_assert!(self.header.extra.is_some()); + let extra = self.header.extra.as_mut().unwrap(); + while (*count as usize) < extra.len() { + *count += read_into(r, &mut extra[*count as usize..])? as u16; + } + if let Some(crc) = crc { + crc.update(extra); + } + self.state = GzHeaderState::Filename(crc.take()); } - r.part.state = GzHeaderParsingState::Filename; - } - GzHeaderParsingState::Filename => { - if r.part.flg & FNAME != 0 { - if r.part.header.filename.is_none() { - r.part.header.filename = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; + GzHeaderState::Filename(crc) => { + if self.flags & FNAME != 0 { + let filename = self.header.filename.get_or_insert_with(Vec::new); + read_to_nul(r, filename)?; + if let Some(crc) = crc { + crc.update(filename); + crc.update(b"0"); } } + self.state = GzHeaderState::Comment(crc.take()); } - r.part.state = GzHeaderParsingState::Comment; - } - GzHeaderParsingState::Comment => { - if r.part.flg & FCOMMENT != 0 { - if r.part.header.comment.is_none() { - r.part.header.comment = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; + GzHeaderState::Comment(crc) => { + if self.flags & FCOMMENT != 0 { + let comment = self.header.comment.get_or_insert_with(Vec::new); + read_to_nul(r, comment)?; + if let Some(crc) = crc { + crc.update(comment); + crc.update(b"0"); } } + self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]); } - r.part.state = GzHeaderParsingState::Crc; - } - GzHeaderParsingState::Crc => { - if r.part.flg & FHCRC != 0 { - let stored_crc = read_le_u16(r)?; - let calced_crc = r.part.crc.sum() as u16; - if stored_crc != calced_crc { - return Err(corrupt()); + GzHeaderState::Crc(crc, count, buffer) => { + if let Some(crc) = crc { + debug_assert!(self.flags & FHCRC != 0); + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + let stored_crc = parse_le_u16(&buffer); + let calced_crc = crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } } + self.state = GzHeaderState::Complete; + } + GzHeaderState::Complete => { + return Ok(()); } - return Ok(()); } } } + + fn header(&self) -> Option<&GzHeader> { + match self.state { + GzHeaderState::Complete => Some(&self.header), + _ => None, + } + } +} + +impl From for GzHeader { + fn from(parser: GzHeaderParser) -> Self { + debug_assert!(matches!(parser.state, GzHeaderState::Complete)); + parser.header + } } -fn read_gz_header(r: &mut R) -> io::Result { - let mut part = GzHeaderPartial::new(); +// Attempt to fill the `buffer` from `r`. Return the number of bytes read. +// Return an error if EOF is read before the buffer is full. This differs +// from `read` in that Ok(0) means that more data may be available. +fn read_into(r: &mut R, buffer: &mut [u8]) -> Result { + debug_assert!(!buffer.is_empty()); + match r.read(buffer) { + Ok(0) => Err(ErrorKind::UnexpectedEof.into()), + Ok(n) => Ok(n), + Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0), + Err(e) => Err(e), + } +} - let result = { - let mut reader = Buffer::new(&mut part, r); - read_gz_header_part(&mut reader) - }; - result.map(|()| part.take_header()) +// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`. +fn read_to_nul(r: &mut R, buffer: &mut Vec) -> Result<()> { + let mut bytes = r.bytes(); + loop { + match bytes.next().transpose()? { + Some(byte) if byte == 0 => { + return Ok(()); + } + Some(_) if buffer.len() == MAX_HEADER_BUF => { + return Err(Error::new( + ErrorKind::InvalidInput, + "gzip header field too long", + )); + } + Some(byte) => { + buffer.push(byte); + } + None => { + return Err(ErrorKind::UnexpectedEof.into()); + } + } + } } -fn read_le_u16(r: &mut Buffer) -> io::Result { - let mut b = [0; 2]; - r.read_and_forget(&mut b)?; - Ok((b[0] as u16) | ((b[1] as u16) << 8)) +fn parse_le_u16(buffer: &[u8; 2]) -> u16 { + (buffer[0] as u16) | ((buffer[1] as u16) << 8) } -fn bad_header() -> io::Error { - io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") +fn bad_header() -> Error { + Error::new(ErrorKind::InvalidInput, "invalid gzip header") } -fn corrupt() -> io::Error { - io::Error::new( - io::ErrorKind::InvalidInput, +fn corrupt() -> Error { + Error::new( + ErrorKind::InvalidInput, "corrupt gzip stream does not have a matching checksum", ) } @@ -398,77 +453,6 @@ impl GzBuilder { } } -/// A small adapter which reads data originally from `buf` and then reads all -/// further data from `reader`. This will also buffer all data read from -/// `reader` into `buf` for reuse on a further call. -struct Buffer<'a, T: 'a> { - part: &'a mut GzHeaderPartial, - buf_cur: usize, - buf_max: usize, - reader: &'a mut T, -} - -impl<'a, T> Buffer<'a, T> { - fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { - Buffer { - reader, - buf_cur: 0, - buf_max: part.buf.len(), - part, - } - } -} - -impl<'a, T: Read> Read for Buffer<'a, T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let mut bufref = match self.part.state { - GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), - GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), - _ => None, - }; - if let Some(ref mut b) = bufref { - // we have a direct reference to a buffer where to write - let len = self.reader.read(buf)?; - if len > 0 && buf[len - 1] == 0 { - // we do not append the final 0 - b.extend_from_slice(&buf[..len - 1]); - } else { - b.extend_from_slice(&buf[..len]); - } - self.part.crc.update(&buf[..len]); - Ok(len) - } else if self.buf_cur == self.buf_max { - // we read new bytes and also save them in self.part.buf - let len = self.reader.read(buf)?; - self.part.buf.extend_from_slice(&buf[..len]); - self.part.crc.update(&buf[..len]); - Ok(len) - } else { - // we first read the previously saved bytes - let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; - self.buf_cur += len; - Ok(len) - } - } -} - -impl<'a, T> Buffer<'a, T> -where - T: std::io::Read, -{ - // If we manage to read all the bytes, we reset the buffer - fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result { - self.read_exact(buf)?; - // we managed to read the whole buf - // we will no longer need the previously saved bytes in self.part.buf - let rlen = buf.len(); - self.part.buf.truncate(0); - self.buf_cur = 0; - self.buf_max = 0; - Ok(rlen) - } -} - #[cfg(test)] mod tests { use std::io::prelude::*; diff --git a/src/gz/write.rs b/src/gz/write.rs index 5336a17e..339914d0 100644 --- a/src/gz/write.rs +++ b/src/gz/write.rs @@ -2,7 +2,7 @@ use std::cmp; use std::io; use std::io::prelude::*; -use super::{corrupt, read_gz_header, GzBuilder, GzHeader}; +use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::{Crc, CrcWriter}; use crate::zio; use crate::{Compress, Compression, Decompress, Status}; @@ -202,8 +202,7 @@ impl Drop for GzEncoder { pub struct GzDecoder { inner: zio::Writer, Decompress>, crc_bytes: Vec, - header: Option, - header_buf: Vec, + header_parser: GzHeaderParser, } const CRC_BYTES_LEN: usize = 8; @@ -217,14 +216,13 @@ impl GzDecoder { GzDecoder { inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), - header: None, - header_buf: Vec::new(), + header_parser: GzHeaderParser::new(), } } /// Returns the header associated with this stream. pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + self.header_parser.header() } /// Acquires a reference to the underlying writer. @@ -305,47 +303,24 @@ impl GzDecoder { } } -struct Counter { - inner: T, - pos: usize, -} - -impl Read for Counter { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let pos = self.inner.read(buf)?; - self.pos += pos; - Ok(pos) - } -} - impl Write for GzDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - if self.header.is_none() { - // trying to avoid buffer usage - let (res, pos) = { - let mut counter = Counter { - inner: self.header_buf.chain(buf), - pos: 0, - }; - let res = read_gz_header(&mut counter); - (res, counter.pos) - }; - - match res { + fn write(&mut self, mut buf: &[u8]) -> io::Result { + let buflen = buf.len(); + if self.header().is_none() { + match self.header_parser.parse(&mut buf) { Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - // not enough data for header, save to the buffer - self.header_buf.extend(buf); - Ok(buf.len()) + // all data read but header still not complete + Ok(buflen) } else { Err(err) } } - Ok(header) => { - self.header = Some(header); - let pos = pos - self.header_buf.len(); - self.header_buf.truncate(0); - Ok(pos) + Ok(_) => { + debug_assert!(self.header().is_some()); + // buf now contains the unread part of the original buf + let n = buflen - buf.len(); + Ok(n) } } } else { @@ -522,6 +497,56 @@ mod tests { assert_eq!(return_string, STR); } + #[test] + fn decode_writer_partial_header_filename() { + let filename = "test.txt"; + let mut e = GzBuilder::new() + .filename(filename) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().filename().unwrap(), + filename.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_header_comment() { + let comment = "test comment"; + let mut e = GzBuilder::new() + .comment(comment) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().comment().unwrap(), + comment.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + #[test] fn decode_writer_exact_header() { let mut e = GzEncoder::new(Vec::new(), Compression::default());