Skip to content
This repository has been archived by the owner on Jun 9, 2018. It is now read-only.

Commit

Permalink
Fixed few panics that can be triggered by an invalid input.
Browse files Browse the repository at this point in the history
Added 'Stream::is_letter_raw'.
Maximum numeric exponent is 100 now.
Removed 'Error::ElementWithoutTagName'
Removed 'Stream::read_to_trimmed'.
  • Loading branch information
RazrFalcon committed Feb 21, 2017
1 parent 1dbceba commit 4742f16
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 66 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased]
### Added
- `Error::Utf8Error` instead of panic during `str::from_utf8` unwrap.
- `Stream::is_letter_raw`.

### Changed
- Maximum numeric exponent is 100 now. It's needed to prevent multiply overflow.

### Removed
- The `u8_to_str` macro.
- `Error::ElementWithoutTagName`. `Error::InvalidSvgToken` will be emitted instead.
- `Stream::read_to_trimmed`.

### Fixed
- Few panics that can be triggered by an invalid input.

## [0.2.1] - 2017-02-01
### Fixed
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ See the documentation for details.

### Usage

Dependency: Rust >= 1.13

Add this to your `Cargo.toml`:

```toml
Expand Down
4 changes: 4 additions & 0 deletions examples/extract_paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ extern crate svgparser;

use std::env;
use std::fs;
use std::str;
use std::io::Read;

use svgparser::svg;
Expand All @@ -20,6 +21,9 @@ fn main() {
let mut v = Vec::new();
file.read_to_end(&mut v).unwrap();

// Check that input file is a valid UTF-8 text.
str::from_utf8(&v).unwrap();

// Begin parsing.
let mut p = svg::Tokenizer::new(&v);
// Get next token.
Expand Down
4 changes: 0 additions & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ pub enum Error {
/// Absolute stream position.
pos: ErrorPos,
},
/// An SVG element must contain a tag name.
ElementWithoutTagName(ErrorPos),
/// UTF-8 processing error.
Utf8Error(str::Utf8Error),
}
Expand Down Expand Up @@ -102,8 +100,6 @@ impl fmt::Display for Error {
Error::InvalidAdvance{ ref expected, ref total, ref pos } =>
write!(f, "Attempt to advance to the pos {} from {}, but total len is {}",
expected, pos, total),
Error::ElementWithoutTagName(ref pos) =>
write!(f, "An element without a tag name at {}", pos),
Error::Utf8Error(e) =>
write!(f, "{}", e),
}
Expand Down
3 changes: 1 addition & 2 deletions src/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
/// List of all SVG length units.
#[derive(Clone,Copy,Debug,PartialEq)]
#[allow(missing_docs)]
pub enum LengthUnit
{
pub enum LengthUnit {
None,
Em,
Ex,
Expand Down
75 changes: 39 additions & 36 deletions src/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ fn is_space(c: u8) -> bool {
}
}

#[inline]
fn is_letter(c: u8) -> bool {
match c {
b'A'...b'Z' | b'a'...b'z' => true,
_ => false,
}
}

// Table giving binary powers of 10.
// Entry is 10^2^i. Used to convert decimal
// exponents into floating-point numbers.
Expand Down Expand Up @@ -309,7 +317,7 @@ impl<'a> Stream<'a> {
self.pos += n;
}

/// Checks that char at the current position is (white)space.
/// Checks that the current char is (white)space.
///
/// Accepted chars: ' ', '\n', '\r', '\t'.
///
Expand Down Expand Up @@ -375,6 +383,27 @@ impl<'a> Stream<'a> {
}
}

/// Checks that the current char is a letter.
#[inline]
pub fn is_letter_raw(&self) -> bool {
is_letter(self.curr_char_raw())
}

/// Checks that the current char is a valid part of an ident token.
#[inline]
pub fn is_ident_raw(&self) -> bool {
let c = self.curr_char_raw();
match c {
b'0'...b'9'
| b'A'...b'Z'
| b'a'...b'z'
| b'-'
| b'_'
| b':' => true,
_ => false,
}
}

#[inline]
fn get_char_raw(&self, pos: usize) -> u8 {
// TODO: maybe via unsafe to avoid bound checking
Expand Down Expand Up @@ -565,40 +594,6 @@ impl<'a> Stream<'a> {
Ok(s)
}

/// Returns reference to the trimmed data until selected char and
/// advance stream by the data length.
///
/// Same as `read_to()`, but also trim spaces from the both sides.
///
/// # Errors
///
/// Returns `Error::UnexpectedEndOfStream` if no such char.
///
/// # Examples
///
/// ```
/// use svgparser::Stream;
///
/// let mut s = Stream::new(b" Some text .");
/// assert_eq!(s.read_to_trimmed(b'.').unwrap(), b"Some text");
///
/// let mut s = Stream::new(b"Some text.");
/// assert_eq!(s.read_to_trimmed(b'.').unwrap(), b"Some text");
/// ```
#[inline]
pub fn read_to_trimmed(&mut self, c: u8) -> Result<&'a [u8], Error> {
self.skip_spaces();

let mut s = self.read_to(c)?;

// trim spaces at the end of the string
if let Some(p) = s.iter().rposition(|c| !is_space(*c)) {
s = &s[0..(p + 1)];
}

Ok(s)
}

/// Returns next data of stream with selected length.
///
/// # Examples
Expand Down Expand Up @@ -873,10 +868,18 @@ impl<'a> Stream<'a> {
self.advance(1)?;
}

while !self.at_end() && is_digit(self.curr_char()?) {
while !self.at_end() && is_digit(self.curr_char()?) && exp < 100 {
// TODO: use checked multiply
exp = exp * 10 + (self.curr_char_raw() - b'0') as i32;
self.advance_raw(1);
}

// check that 'exp' is less than 100 to prevent multiply overflow
if exp >= 100 {
// back to start
self.pos = start;
return Err(Error::InvalidNumber(self.gen_error_pos()));
}
}
}

Expand Down
93 changes: 75 additions & 18 deletions src/svg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,30 +217,48 @@ impl<'a> Tokenizer<'a> {
}

fn parse_declaration(&mut self) -> Result<Token<'a>, Error> {
let l = self.stream.len_to(b'>')?;
self.stream.advance(6)?; // '<?xml '
let s = self.stream.read_raw(l - 7);
self.stream.advance(2)?; // '?>'
debug_assert!(self.stream.starts_with(b"<?"));

// we are parsing the Declaration, not the Processing Instruction
if !self.stream.starts_with(b"<?xml ") {
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}
self.stream.advance_raw(6); // '<?xml '

// TODO: ? can be inside the string
let l = self.stream.len_to(b'?')?;
let s = self.stream.read_raw(l);

self.stream.consume_char(b'?')?;
self.stream.consume_char(b'>')?;

Ok(Token::Declaration(s))
}

fn parse_comment(&mut self) -> Result<Token<'a>, Error> {
self.stream.advance(4)?; // skip <!--
let comment_start_pos = self.stream.pos();
let start_pos = self.stream.pos();

// read all until -->
loop {
self.stream.jump_to(b'>')?;
let len = self.stream.len_to(b'>')?;

// length should be at least 2 to prevent '-' chars overlap
// like this: '<!-->'
if len < 2 {
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}

self.stream.advance_raw(len);
if self.stream.char_at(-1)? == b'-' && self.stream.char_at(-2)? == b'-' {
break;
}
self.stream.advance(1)?;
}

// save data between <!-- and -->
let comment_end_pos = self.stream.pos() - 2;
let s = self.stream.slice_region_raw(comment_start_pos, comment_end_pos);
let end_pos = self.stream.pos() - 2;
let s = self.stream.slice_region_raw(start_pos, end_pos);
self.stream.advance(1)?;

Ok(Token::Comment(s))
Expand Down Expand Up @@ -275,7 +293,10 @@ impl<'a> Tokenizer<'a> {
// if first occurred char is '[' - than DTD has content
// if first occurred char is '>' - than DTD is empty

self.stream.advance(10)?; // '<!DOCTYPE '
debug_assert!(self.stream.starts_with(b"<!DOCTYPE"));

self.stream.advance_raw(9); // '<!DOCTYPE'
self.stream.consume_char(b' ')?;
let start = self.stream.pos();

let l = self.stream.slice_tail().into_iter().position(|x| *x == b'[' || *x == b'>');
Expand All @@ -284,6 +305,10 @@ impl<'a> Tokenizer<'a> {
None => return Err(self.stream.gen_end_of_stream_error()),
}

if start == self.stream.pos() {
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}

if self.stream.is_char_eq(b'>')? {
// empty DOCTYPE
let text = self.stream.slice_region_raw(start, self.stream.pos());
Expand Down Expand Up @@ -343,20 +368,36 @@ impl<'a> Tokenizer<'a> {
}

fn parse_element(&mut self) -> Result<Token<'a>, Error> {
debug_assert!(self.stream.is_char_eq_raw(b'<'));
self.stream.advance(1)?; // <

let start_pos = self.stream.pos();

while !self.stream.at_end()
&& !self.stream.is_space_raw()
&& !self.stream.is_char_eq_raw(b'/')
&& !self.stream.is_char_eq_raw(b'>') {
// consume a tag name
while !self.stream.at_end() && self.stream.is_ident_raw() {
self.stream.advance(1)?;
}

// check that current char is a valid one:
// '<tagname '
// '<tagname/'
// '<tagname>'
if !self.stream.at_end() {
if !self.stream.is_space_raw()
&& !self.stream.is_char_eq_raw(b'/')
&& !self.stream.is_char_eq_raw(b'>')
{
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}
} else {
// stream can't end here
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}

// check that element has tag name
if start_pos == self.stream.pos() {
return Err(Error::ElementWithoutTagName(self.stream.gen_error_pos()));
// return Err(Error::ElementWithoutTagName(self.stream.gen_error_pos()));
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}

let tag_name = self.stream.slice_region_raw(start_pos, self.stream.pos());
Expand All @@ -377,13 +418,29 @@ impl<'a> Tokenizer<'a> {
if self.stream.is_char_eq(b'>')? {
self.stream.advance_raw(1);
self.state = State::Unknown;

return Ok(Token::ElementEnd(ElementEnd::Open));
}

let key = self.stream.read_to_trimmed(b'=')?;
self.stream.skip_spaces();

let name = {
let start = self.stream.pos();
// consume an attribute name
while !self.stream.at_end() && self.stream.is_ident_raw() {
self.stream.advance(1)?;
}

let len = self.stream.pos() - start;
if len == 0 {
return Err(Error::InvalidSvgToken(self.stream.gen_error_pos()));
}

self.stream.slice_region_raw(start, start + len)
};

self.stream.skip_spaces();

self.stream.advance(1)?; // =
self.stream.consume_char(b'=')?;
self.stream.skip_spaces();

if !(self.stream.is_char_eq(b'"')? || self.stream.is_char_eq(b'\'')?) {
Expand All @@ -405,6 +462,6 @@ impl<'a> Tokenizer<'a> {

self.stream.skip_spaces();

Ok(Token::Attribute(key, substream))
Ok(Token::Attribute(name, substream))
}
}
1 change: 1 addition & 0 deletions tests/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ test_number_err!(number_err_2, b"", Error::InvalidNumber(ErrorPos::new(1,1))
test_number_err!(number_err_3, b"-", Error::UnexpectedEndOfStream(ErrorPos::new(1,2)));
test_number_err!(number_err_4, b"+", Error::UnexpectedEndOfStream(ErrorPos::new(1,2)));
test_number_err!(number_err_5, b"-q", Error::InvalidNumber(ErrorPos::new(1,1)));
test_number_err!(number_err_6, b"1e1111", Error::InvalidNumber(ErrorPos::new(1,1)));

// ---

Expand Down
Loading

0 comments on commit 4742f16

Please sign in to comment.