From 4742f16e834445a682a0a4db62600d275a457390 Mon Sep 17 00:00:00 2001 From: Reizner Evgeniy Date: Tue, 21 Feb 2017 21:44:32 +0200 Subject: [PATCH] Fixed few panics that can be triggered by an invalid input. Added 'Stream::is_letter_raw'. Maximum numeric exponent is 100 now. Removed 'Error::ElementWithoutTagName' Removed 'Stream::read_to_trimmed'. --- CHANGELOG.md | 9 ++++ README.md | 2 + examples/extract_paths.rs | 4 ++ src/error.rs | 4 -- src/length.rs | 3 +- src/stream.rs | 75 ++++++++++++++++--------------- src/svg.rs | 93 +++++++++++++++++++++++++++++++-------- tests/stream.rs | 1 + tests/svg.rs | 67 +++++++++++++++++++++++++--- 9 files changed, 192 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92f058d..1808e8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Added - `Error::Utf8Error` instead of panic during `str::from_utf8` unwrap. +- `Stream::is_letter_raw`. + +### Changed +- Maximum numeric exponent is 100 now. It's needed to prevent multiply overflow. ### Removed - The `u8_to_str` macro. +- `Error::ElementWithoutTagName`. `Error::InvalidSvgToken` will be emitted instead. +- `Stream::read_to_trimmed`. + +### Fixed +- Few panics that can be triggered by an invalid input. ## [0.2.1] - 2017-02-01 ### Fixed diff --git a/README.md b/README.md index 306338f..e001659 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ See the documentation for details. ### Usage +Dependency: Rust >= 1.13 + Add this to your `Cargo.toml`: ```toml diff --git a/examples/extract_paths.rs b/examples/extract_paths.rs index e00e32e..2a176ed 100644 --- a/examples/extract_paths.rs +++ b/examples/extract_paths.rs @@ -2,6 +2,7 @@ extern crate svgparser; use std::env; use std::fs; +use std::str; use std::io::Read; use svgparser::svg; @@ -20,6 +21,9 @@ fn main() { let mut v = Vec::new(); file.read_to_end(&mut v).unwrap(); + // Check that input file is a valid UTF-8 text. + str::from_utf8(&v).unwrap(); + // Begin parsing. let mut p = svg::Tokenizer::new(&v); // Get next token. diff --git a/src/error.rs b/src/error.rs index 06d42da..4168d5c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -72,8 +72,6 @@ pub enum Error { /// Absolute stream position. pos: ErrorPos, }, - /// An SVG element must contain a tag name. - ElementWithoutTagName(ErrorPos), /// UTF-8 processing error. Utf8Error(str::Utf8Error), } @@ -102,8 +100,6 @@ impl fmt::Display for Error { Error::InvalidAdvance{ ref expected, ref total, ref pos } => write!(f, "Attempt to advance to the pos {} from {}, but total len is {}", expected, pos, total), - Error::ElementWithoutTagName(ref pos) => - write!(f, "An element without a tag name at {}", pos), Error::Utf8Error(e) => write!(f, "{}", e), } diff --git a/src/length.rs b/src/length.rs index 72b5aab..3a506e0 100644 --- a/src/length.rs +++ b/src/length.rs @@ -5,8 +5,7 @@ /// List of all SVG length units. #[derive(Clone,Copy,Debug,PartialEq)] #[allow(missing_docs)] -pub enum LengthUnit -{ +pub enum LengthUnit { None, Em, Ex, diff --git a/src/stream.rs b/src/stream.rs index 7fd1baa..93cb3ea 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -50,6 +50,14 @@ fn is_space(c: u8) -> bool { } } +#[inline] +fn is_letter(c: u8) -> bool { + match c { + b'A'...b'Z' | b'a'...b'z' => true, + _ => false, + } +} + // Table giving binary powers of 10. // Entry is 10^2^i. Used to convert decimal // exponents into floating-point numbers. @@ -309,7 +317,7 @@ impl<'a> Stream<'a> { self.pos += n; } - /// Checks that char at the current position is (white)space. + /// Checks that the current char is (white)space. /// /// Accepted chars: ' ', '\n', '\r', '\t'. /// @@ -375,6 +383,27 @@ impl<'a> Stream<'a> { } } + /// Checks that the current char is a letter. + #[inline] + pub fn is_letter_raw(&self) -> bool { + is_letter(self.curr_char_raw()) + } + + /// Checks that the current char is a valid part of an ident token. + #[inline] + pub fn is_ident_raw(&self) -> bool { + let c = self.curr_char_raw(); + match c { + b'0'...b'9' + | b'A'...b'Z' + | b'a'...b'z' + | b'-' + | b'_' + | b':' => true, + _ => false, + } + } + #[inline] fn get_char_raw(&self, pos: usize) -> u8 { // TODO: maybe via unsafe to avoid bound checking @@ -565,40 +594,6 @@ impl<'a> Stream<'a> { Ok(s) } - /// Returns reference to the trimmed data until selected char and - /// advance stream by the data length. - /// - /// Same as `read_to()`, but also trim spaces from the both sides. - /// - /// # Errors - /// - /// Returns `Error::UnexpectedEndOfStream` if no such char. - /// - /// # Examples - /// - /// ``` - /// use svgparser::Stream; - /// - /// let mut s = Stream::new(b" Some text ."); - /// assert_eq!(s.read_to_trimmed(b'.').unwrap(), b"Some text"); - /// - /// let mut s = Stream::new(b"Some text."); - /// assert_eq!(s.read_to_trimmed(b'.').unwrap(), b"Some text"); - /// ``` - #[inline] - pub fn read_to_trimmed(&mut self, c: u8) -> Result<&'a [u8], Error> { - self.skip_spaces(); - - let mut s = self.read_to(c)?; - - // trim spaces at the end of the string - if let Some(p) = s.iter().rposition(|c| !is_space(*c)) { - s = &s[0..(p + 1)]; - } - - Ok(s) - } - /// Returns next data of stream with selected length. /// /// # Examples @@ -873,10 +868,18 @@ impl<'a> Stream<'a> { self.advance(1)?; } - while !self.at_end() && is_digit(self.curr_char()?) { + while !self.at_end() && is_digit(self.curr_char()?) && exp < 100 { + // TODO: use checked multiply exp = exp * 10 + (self.curr_char_raw() - b'0') as i32; self.advance_raw(1); } + + // check that 'exp' is less than 100 to prevent multiply overflow + if exp >= 100 { + // back to start + self.pos = start; + return Err(Error::InvalidNumber(self.gen_error_pos())); + } } } diff --git a/src/svg.rs b/src/svg.rs index 006e984..6d0cb1b 100644 --- a/src/svg.rs +++ b/src/svg.rs @@ -217,21 +217,39 @@ impl<'a> Tokenizer<'a> { } fn parse_declaration(&mut self) -> Result, Error> { - let l = self.stream.len_to(b'>')?; - self.stream.advance(6)?; // '' + debug_assert!(self.stream.starts_with(b"')?; Ok(Token::Declaration(s)) } fn parse_comment(&mut self) -> Result, Error> { self.stream.advance(4)?; // skip loop { - self.stream.jump_to(b'>')?; + let len = self.stream.len_to(b'>')?; + + // length should be at least 2 to prevent '-' chars overlap + // like this: '' + if len < 2 { + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); + } + + self.stream.advance_raw(len); if self.stream.char_at(-1)? == b'-' && self.stream.char_at(-2)? == b'-' { break; } @@ -239,8 +257,8 @@ impl<'a> Tokenizer<'a> { } // save data between - let comment_end_pos = self.stream.pos() - 2; - let s = self.stream.slice_region_raw(comment_start_pos, comment_end_pos); + let end_pos = self.stream.pos() - 2; + let s = self.stream.slice_region_raw(start_pos, end_pos); self.stream.advance(1)?; Ok(Token::Comment(s)) @@ -275,7 +293,10 @@ impl<'a> Tokenizer<'a> { // if first occurred char is '[' - than DTD has content // if first occurred char is '>' - than DTD is empty - self.stream.advance(10)?; // ''); @@ -284,6 +305,10 @@ impl<'a> Tokenizer<'a> { None => return Err(self.stream.gen_end_of_stream_error()), } + if start == self.stream.pos() { + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); + } + if self.stream.is_char_eq(b'>')? { // empty DOCTYPE let text = self.stream.slice_region_raw(start, self.stream.pos()); @@ -343,20 +368,36 @@ impl<'a> Tokenizer<'a> { } fn parse_element(&mut self) -> Result, Error> { + debug_assert!(self.stream.is_char_eq_raw(b'<')); self.stream.advance(1)?; // < let start_pos = self.stream.pos(); - while !self.stream.at_end() - && !self.stream.is_space_raw() - && !self.stream.is_char_eq_raw(b'/') - && !self.stream.is_char_eq_raw(b'>') { + // consume a tag name + while !self.stream.at_end() && self.stream.is_ident_raw() { self.stream.advance(1)?; } + // check that current char is a valid one: + // '' + if !self.stream.at_end() { + if !self.stream.is_space_raw() + && !self.stream.is_char_eq_raw(b'/') + && !self.stream.is_char_eq_raw(b'>') + { + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); + } + } else { + // stream can't end here + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); + } + // check that element has tag name if start_pos == self.stream.pos() { - return Err(Error::ElementWithoutTagName(self.stream.gen_error_pos())); + // return Err(Error::ElementWithoutTagName(self.stream.gen_error_pos())); + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); } let tag_name = self.stream.slice_region_raw(start_pos, self.stream.pos()); @@ -377,13 +418,29 @@ impl<'a> Tokenizer<'a> { if self.stream.is_char_eq(b'>')? { self.stream.advance_raw(1); self.state = State::Unknown; - return Ok(Token::ElementEnd(ElementEnd::Open)); } - let key = self.stream.read_to_trimmed(b'=')?; + self.stream.skip_spaces(); + + let name = { + let start = self.stream.pos(); + // consume an attribute name + while !self.stream.at_end() && self.stream.is_ident_raw() { + self.stream.advance(1)?; + } + + let len = self.stream.pos() - start; + if len == 0 { + return Err(Error::InvalidSvgToken(self.stream.gen_error_pos())); + } + + self.stream.slice_region_raw(start, start + len) + }; + + self.stream.skip_spaces(); - self.stream.advance(1)?; // = + self.stream.consume_char(b'=')?; self.stream.skip_spaces(); if !(self.stream.is_char_eq(b'"')? || self.stream.is_char_eq(b'\'')?) { @@ -405,6 +462,6 @@ impl<'a> Tokenizer<'a> { self.stream.skip_spaces(); - Ok(Token::Attribute(key, substream)) + Ok(Token::Attribute(name, substream)) } } diff --git a/tests/stream.rs b/tests/stream.rs index 75b3264..833626b 100644 --- a/tests/stream.rs +++ b/tests/stream.rs @@ -57,6 +57,7 @@ test_number_err!(number_err_2, b"", Error::InvalidNumber(ErrorPos::new(1,1)) test_number_err!(number_err_3, b"-", Error::UnexpectedEndOfStream(ErrorPos::new(1,2))); test_number_err!(number_err_4, b"+", Error::UnexpectedEndOfStream(ErrorPos::new(1,2))); test_number_err!(number_err_5, b"-q", Error::InvalidNumber(ErrorPos::new(1,1))); +test_number_err!(number_err_6, b"1e1111", Error::InvalidNumber(ErrorPos::new(1,1))); // --- diff --git a/tests/svg.rs b/tests/svg.rs index 9644bea..4b39bf4 100644 --- a/tests/svg.rs +++ b/tests/svg.rs @@ -107,6 +107,13 @@ fn parse_comment_3() { assert_eq!(p.parse_next().unwrap(), svg::Token::EndOfStream); } +#[test] +fn parse_comment_4() { + let mut p = svg::Tokenizer::new(b""); + basic_assert_eq!(p, svg::Token::Comment(b"")); + assert_eq!(p.parse_next().unwrap(), svg::Token::EndOfStream); +} + #[test] fn parse_cdata_1() { let mut p = svg::Tokenizer::new(b""); @@ -312,6 +319,17 @@ fn parse_attributes_8() { assert_eq!(p.parse_next().unwrap(), svg::Token::EndOfStream); } +#[test] +fn parse_attributes_9() { + let mut p = svg::Tokenizer::new(b""); + basic_assert_eq!(p, svg::Token::ElementStart(b"svg")); + attr_assert_eq!(p, b"stroke-width", b"1.0"); + attr_assert_eq!(p, b"x1", b"1.0"); + attr_assert_eq!(p, b"xlink:href", b"#link"); + basic_assert_eq!(p, svg::Token::ElementEnd(svg::ElementEnd::Empty)); + assert_eq!(p.parse_next().unwrap(), svg::Token::EndOfStream); +} + #[test] fn parse_text_1() { let mut p = svg::Tokenizer::new(b"

text

"); @@ -399,7 +417,7 @@ fn stream_end_on_element_2() { #[test] fn stream_end_on_element_4() { let mut p = svg::Tokenizer::new(b"<"); - assert_eq!(p.parse_next().err().unwrap(), Error::ElementWithoutTagName(ErrorPos::new(1, 2))); + assert_eq!(p.parse_next().err().unwrap(), Error::InvalidSvgToken(ErrorPos::new(1, 2))); } #[test] @@ -407,27 +425,26 @@ fn stream_end_on_element_5() { let mut p = svg::Tokenizer::new(b"<"); basic_assert_eq!(p, svg::Token::ElementStart(b"svg")); basic_assert_eq!(p, svg::Token::ElementEnd(svg::ElementEnd::Open)); - assert_eq!(p.parse_next().err().unwrap(), Error::ElementWithoutTagName(ErrorPos::new(1, 7))); + assert_eq!(p.parse_next().err().unwrap(), Error::InvalidSvgToken(ErrorPos::new(1, 7))); } #[test] fn stream_end_on_element_6() { let mut p = svg::Tokenizer::new(b"< svg"); - assert_eq!(p.parse_next().err().unwrap(), Error::ElementWithoutTagName(ErrorPos::new(1, 2))); + assert_eq!(p.parse_next().err().unwrap(), Error::InvalidSvgToken(ErrorPos::new(1, 2))); } #[test] fn stream_end_on_element_7() { let mut p = svg::Tokenizer::new(b""); + assert_eq!(p.parse_next().err().unwrap(), Error::InvalidSvgToken(ErrorPos::new(2, 2))); +} + +#[test] +fn invalid_structure_4() { + let mut p = svg::Tokenizer::new(b""); + assert_eq!(p.parse_next().err().unwrap(), Error::InvalidSvgToken(ErrorPos::new(1, 5))); +}