diff --git a/Cargo.toml b/Cargo.toml index 97085c13..e348cf42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ include = [ ] [dependencies] -inflate = "0.4.2" +miniz_oxide = "0.3.5" deflate = { version = "0.8.2", optional = true } bitflags = "1.0" crc32fast = "1.2.0" diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index c46b31af..569cf39d 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -1,4 +1,5 @@ mod stream; +mod zlib; use self::stream::{get_info, CHUNCK_BUFFER_SIZE}; pub use self::stream::{Decoded, DecodingError, StreamingDecoder}; @@ -170,21 +171,22 @@ impl ReadDecoder { while !self.at_eof { let buf = self.reader.fill_buf()?; if buf.is_empty() { - return Err(DecodingError::Format("unexpected EOF".into())); + return Err(DecodingError::Format("unexpected EOF after image".into())); } let (consumed, event) = self.decoder.update(buf, &mut vec![])?; self.reader.consume(consumed); match event { Decoded::Nothing => (), Decoded::ImageEnd => self.at_eof = true, - Decoded::ChunkComplete(_, _) => return Ok(()), - Decoded::ImageData => { /*ignore more data*/ } + // ignore more data + Decoded::ChunkComplete(_, _) | Decoded::ChunkBegin(_, _) | Decoded::ImageData => {} + Decoded::ImageDataFlushed => return Ok(()), Decoded::PartialChunk(_) => {} new => unreachable!("{:?}", new), } } - Err(DecodingError::Format("unexpected EOF".into())) + Err(DecodingError::Format("unexpected EOF after image".into())) } fn info(&self) -> Option<&Info> { @@ -227,6 +229,7 @@ struct SubframeInfo { width: u32, rowlen: usize, interlace: InterlaceIter, + consumed_and_flushed: bool, } #[derive(Clone)] @@ -409,7 +412,9 @@ impl Reader { } } // Advance over the rest of data for this (sub-)frame. - self.decoder.finished_decoding()?; + if !self.subframe.consumed_and_flushed { + self.decoder.finished_decoding()?; + } // Advance our state to expect the next frame. self.finished_frame(); Ok(()) @@ -657,6 +662,12 @@ impl Reader { interlace: passdata, })); } else { + if self.subframe.consumed_and_flushed { + return Err(DecodingError::Format( + format!("not enough data for image").into(), + )); + } + // Clear the current buffer before appending more data. if self.scan_start > 0 { self.current.drain(..self.scan_start).for_each(drop); @@ -666,6 +677,9 @@ impl Reader { let val = self.decoder.decode_next(&mut self.current)?; match val { Some(Decoded::ImageData) => {} + Some(Decoded::ImageDataFlushed) => { + self.subframe.consumed_and_flushed = true; + } None => { if !self.current.is_empty() { return Err(DecodingError::Format("file truncated".into())); @@ -686,6 +700,7 @@ impl SubframeInfo { width: 0, rowlen: 0, interlace: InterlaceIter::None(0..0), + consumed_and_flushed: false, } } @@ -708,6 +723,7 @@ impl SubframeInfo { width, rowlen: info.raw_row_length_from_width(width), interlace, + consumed_and_flushed: false, } } } diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index 76442746..b3ec1ff8 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -1,5 +1,4 @@ extern crate crc32fast; -extern crate inflate; use std::borrow::Cow; use std::cmp::min; @@ -11,7 +10,7 @@ use std::io; use crc32fast::Hasher as Crc32; -use self::inflate::InflateStream; +use super::zlib::ZlibStream; use crate::chunk::{self, ChunkType, IDAT, IEND, IHDR}; use crate::common::{ AnimationControl, BitDepth, BlendOp, ColorType, DisposeOp, FrameControl, Info, PixelDimensions, @@ -28,14 +27,6 @@ pub const CHUNCK_BUFFER_SIZE: usize = 32 * 1024; /// be used to detect that build. const CHECKSUM_DISABLED: bool = cfg!(fuzzing); -fn zlib_stream() -> InflateStream { - if CHECKSUM_DISABLED { - InflateStream::from_zlib_no_checksum() - } else { - InflateStream::from_zlib() - } -} - #[derive(Debug)] enum U32Value { // CHUNKS @@ -69,6 +60,10 @@ pub enum Decoded { FrameControl(FrameControl), /// Decoded raw image data. ImageData, + /// The last of a consecutive chunk of IDAT was done. + /// This is distinct from ChunkComplete which only marks that some IDAT chunk was completed but + /// not that no additional IDAT chunk follows. + ImageDataFlushed, PartialChunk(ChunkType), ImageEnd, } @@ -141,7 +136,7 @@ pub struct StreamingDecoder { state: Option, current_chunk: ChunkState, /// The inflater state handling consecutive `IDAT` and `fdAT` chunks. - inflater: InflateStream, + inflater: ZlibStream, /// The complete image info read from all prior chunks. info: Option, /// The animation chunk sequence number. @@ -152,6 +147,10 @@ pub struct StreamingDecoder { } struct ChunkState { + /// The type of the current chunk. + /// Relevant for `IDAT` and `fdAT` which aggregate consecutive chunks of their own type. + type_: ChunkType, + /// Partial crc until now. crc: Crc32, @@ -170,7 +169,7 @@ impl StreamingDecoder { StreamingDecoder { state: Some(State::Signature(0, [0; 7])), current_chunk: ChunkState::default(), - inflater: zlib_stream(), + inflater: ZlibStream::new(), info: None, current_seq_no: None, apng_seq_handled: false, @@ -184,7 +183,7 @@ impl StreamingDecoder { self.current_chunk.crc = Crc32::new(); self.current_chunk.remaining = 0; self.current_chunk.raw_bytes.clear(); - self.inflater = zlib_stream(); + self.inflater = ZlibStream::new(); self.info = None; self.current_seq_no = None; self.apng_seq_handled = false; @@ -269,6 +268,20 @@ impl StreamingDecoder { (val >> 8) as u8, val as u8, ]; + if type_str != self.current_chunk.type_ + && (self.current_chunk.type_ == IDAT + || self.current_chunk.type_ == chunk::fdAT) + { + self.current_chunk.type_ = type_str; + self.inflater.finish_compressed_chunks(image_data)?; + self.inflater.reset(); + return goto!( + 0, + U32Byte3(Type(length), val & !0xff), + emit Decoded::ImageDataFlushed + ); + } + self.current_chunk.type_ = type_str; self.current_chunk.crc.reset(); self.current_chunk.crc.update(&type_str); self.current_chunk.remaining = length; @@ -369,6 +382,7 @@ impl StreamingDecoder { crc, remaining, raw_bytes, + type_: _, } = &mut self.current_chunk; let buf_avail = raw_bytes.capacity() - raw_bytes.len(); let bytes_avail = min(buf.len(), buf_avail); @@ -393,10 +407,9 @@ impl StreamingDecoder { DecodeData(type_str, mut n) => { let chunk_len = self.current_chunk.raw_bytes.len(); let chunk_data = &self.current_chunk.raw_bytes[n..]; - let (c, data) = self.inflater.update(chunk_data)?; - image_data.extend_from_slice(data); + let c = self.inflater.decompress(chunk_data, image_data)?; n += c; - if n == chunk_len && data.is_empty() && c == 0 { + if n == chunk_len && c == 0 { goto!( 0, ReadChunk(type_str, true), @@ -477,7 +490,7 @@ impl StreamingDecoder { } 0 }); - self.inflater = zlib_stream(); + self.inflater = ZlibStream::new(); let fc = FrameControl { sequence_number: next_seq_no, width: buf.read_be()?, @@ -679,9 +692,16 @@ impl Info { } } +impl Default for StreamingDecoder { + fn default() -> Self { + Self::new() + } +} + impl Default for ChunkState { fn default() -> Self { ChunkState { + type_: [0; 4], crc: Crc32::new(), remaining: 0, raw_bytes: Vec::with_capacity(CHUNCK_BUFFER_SIZE), diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs new file mode 100644 index 00000000..b8662f16 --- /dev/null +++ b/src/decoder/zlib.rs @@ -0,0 +1,193 @@ +use super::{DecodingError, CHUNCK_BUFFER_SIZE}; +use std::io; + +use miniz_oxide::inflate::core::{decompress, inflate_flags, DecompressorOxide}; +use miniz_oxide::inflate::TINFLStatus; + +/// Ergonomics wrapper around `miniz_oxide::inflate::stream` for zlib compressed data. +pub(super) struct ZlibStream { + /// Current decoding state. + state: Box, + /// If there has been a call to decompress already. + started: bool, + /// A buffer of compressed data. + /// We use this for a progress guarantee. The data in the input stream is chunked as given by + /// the underlying stream buffer. We will not read any more data until the current buffer has + /// been fully consumed. The zlib decompression can not fully consume all the data when it is + /// in the middle of the stream, it will treat full symbols and maybe the last bytes need to be + /// treated in a special way. The exact reason isn't as important but the interface does not + /// promise us this. Now, the complication is that the _current_ chunking information of PNG + /// alone is not enough to determine this as indeed the compressed stream is the concatenation + /// of all consecutive `IDAT`/`fdAT` chunks. We would need to inspect the next chunk header. + /// + /// Thus, there needs to be a buffer that allows fully clearing a chunk so that the next chunk + /// type can be inspected. + in_buffer: Vec, + /// The logical start of the `in_buffer`. + in_pos: usize, + /// Remaining buffered decoded bytes. + /// The decoder sometimes wants inspect some already finished bytes for further decoding. So we + /// keep a total of 32KB of decoded data available as long as more data may be appended. + out_buffer: Vec, + /// The cursor position in the output stream as a buffer index. + out_pos: usize, +} + +impl ZlibStream { + pub(crate) fn new() -> Self { + ZlibStream { + state: Box::default(), + started: false, + in_buffer: Vec::with_capacity(CHUNCK_BUFFER_SIZE), + in_pos: 0, + out_buffer: vec![0; 2 * CHUNCK_BUFFER_SIZE], + out_pos: 0, + } + } + + pub(crate) fn reset(&mut self) { + self.started = false; + self.in_buffer.clear(); + self.out_buffer.clear(); + self.out_pos = 0; + *self.state = DecompressorOxide::default(); + } + + /// Fill the decoded buffer as far as possible from `data`. + /// On success returns the number of consumed input bytes. + pub(crate) fn decompress( + &mut self, + data: &[u8], + image_data: &mut Vec, + ) -> Result { + const BASE_FLAGS: u32 = inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER + | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF + | inflate_flags::TINFL_FLAG_HAS_MORE_INPUT; + + self.prepare_vec_for_appending(); + + let (status, mut in_consumed, out_consumed) = { + let mut cursor = io::Cursor::new(self.out_buffer.as_mut_slice()); + cursor.set_position(self.out_pos as u64); + let in_data = if self.in_buffer.is_empty() { + data + } else { + &self.in_buffer[self.in_pos..] + }; + decompress(&mut self.state, in_data, &mut cursor, BASE_FLAGS) + }; + + if !self.in_buffer.is_empty() { + self.in_pos += in_consumed; + } + + if self.in_buffer.len() == self.in_pos { + self.in_buffer.clear(); + self.in_pos = 0; + } + + if in_consumed == 0 { + self.in_buffer.extend_from_slice(data); + in_consumed = data.len(); + } + + self.started = true; + self.out_pos += out_consumed; + self.transfer_finished_data(image_data); + + match status { + TINFLStatus::Done | TINFLStatus::HasMoreOutput | TINFLStatus::NeedsMoreInput => { + Ok(in_consumed) + } + _err => Err(DecodingError::CorruptFlateStream), + } + } + + /// Called after all consecutive IDAT chunks were handled. + /// + /// The compressed stream can be split on arbitrary byte boundaries. This enables some cleanup + /// within the decompressor and flushing additional data which may have been kept back in case + /// more data were passed to it. + pub(crate) fn finish_compressed_chunks( + &mut self, + image_data: &mut Vec, + ) -> Result<(), DecodingError> { + const BASE_FLAGS: u32 = inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER + | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + if !self.started { + return Ok(()); + } + + let tail = self.in_buffer.split_off(0); + let tail = &tail[self.in_pos..]; + + let mut start = 0; + loop { + self.prepare_vec_for_appending(); + + let (status, in_consumed, out_consumed) = { + // TODO: we may be able to avoid the indirection through the buffer here. + // First append all buffered data and then create a cursor on the image_data + // instead. + let mut cursor = io::Cursor::new(self.out_buffer.as_mut_slice()); + cursor.set_position(self.out_pos as u64); + decompress(&mut self.state, &tail[start..], &mut cursor, BASE_FLAGS) + }; + + start += in_consumed; + self.out_pos += out_consumed; + + match status { + TINFLStatus::Done => { + self.out_buffer.truncate(self.out_pos as usize); + image_data.append(&mut self.out_buffer); + return Ok(()); + } + TINFLStatus::HasMoreOutput => { + let transferred = self.transfer_finished_data(image_data); + assert!( + transferred > 0 || in_consumed > 0 || out_consumed > 0, + "No more forward progress made in stream decoding." + ); + } + _err => { + return Err(DecodingError::CorruptFlateStream); + } + } + } + } + + /// Resize the vector to allow allocation of more data. + fn prepare_vec_for_appending(&mut self) { + if self.out_buffer.len().saturating_sub(self.out_pos) >= CHUNCK_BUFFER_SIZE { + return; + } + + let buffered_len = self.decoding_size(self.out_buffer.len()); + debug_assert!(self.out_buffer.len() <= buffered_len); + self.out_buffer.resize(buffered_len, 0u8); + } + + fn decoding_size(&self, len: usize) -> usize { + // Allocate one more chunk size than currently or double the length while ensuring that the + // allocation is valid and that any cursor within it will be valid. + len + // This keeps the buffer size a power-of-two, required by miniz_oxide. + .saturating_add(CHUNCK_BUFFER_SIZE.max(len)) + // Ensure all buffer indices are valid cursor positions. + // Note: both cut off and zero extension give correct results. + .min(u64::max_value() as usize) + // Ensure the allocation request is valid. + // TODO: maximum allocation limits? + .min(isize::max_value() as usize) + } + + fn transfer_finished_data(&mut self, image_data: &mut Vec) -> usize { + let safe = self.out_pos.saturating_sub(CHUNCK_BUFFER_SIZE); + // TODO: allocation limits. + image_data.extend(self.out_buffer.drain(..safe)); + self.out_pos -= safe; + safe + } +} diff --git a/tests/bugfixes/x_consecutive_idat.png b/tests/bugfixes/x_consecutive_idat.png new file mode 100644 index 00000000..dc5794d3 Binary files /dev/null and b/tests/bugfixes/x_consecutive_idat.png differ diff --git a/tests/results.txt b/tests/results.txt index 6a1a33ba..291dcb80 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -183,3 +183,4 @@ tests/bugfixes/x_ihdr_missing.png: Expected failure tests/bugfixes/x_unexpected_eof.png: Expected failure tests/pngsuite/x_interlaced_chck.png: Expected failure tests/bugfixes/x_issue#214.png: Expected failure +tests/bugfixes/x_consecutive_idat.png: Expected failure diff --git a/tests/results_identity.txt b/tests/results_identity.txt index 6288d2ed..32dab5d8 100644 --- a/tests/results_identity.txt +++ b/tests/results_identity.txt @@ -183,3 +183,4 @@ tests/bugfixes/x_ihdr_missing.png: Expected failure tests/bugfixes/x_unexpected_eof.png: Expected failure tests/pngsuite/x_interlaced_chck.png: Expected failure tests/bugfixes/x_issue#214.png: Expected failure +tests/bugfixes/x_consecutive_idat.png: Expected failure