Skip to content

Commit

Permalink
Merge pull request #780 from Mingun/end-attributes
Browse files Browse the repository at this point in the history
Allow attributes in the `Event::End` and fix `.error_position()`
  • Loading branch information
Mingun committed Jul 8, 2024
2 parents 0960333 + 6a48a28 commit 959eb55
Show file tree
Hide file tree
Showing 18 changed files with 415 additions and 449 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@

- [#781]: Fix conditions to start CDATA section. Only uppercase `<![CDATA[` can start it.
Previously any case was allowed.
- [#780]: Fixed incorrect `.error_position()` when encountering syntax error for open or self-closed tag.

### Misc Changes

- [#780]: `reader::Parser`, `reader::ElementParser` and `reader::PiParser` moved to the new module `parser`.
- [#776]: Allow to have attributes in the end tag for compatibility reasons with Adobe Flash XML parser.

[#776]: https://github.com/tafia/quick-xml/issues/776
[#780]: https://github.com/tafia/quick-xml/pull/780
[#781]: https://github.com/tafia/quick-xml/pull/781


Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ loop {
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
// exits the loop when reaching end of file
Ok(Event::Eof) => break,

Expand Down Expand Up @@ -98,7 +98,7 @@ loop {
Ok(Event::Eof) => break,
// we can either move or borrow the event to write, depending on your use-case
Ok(e) => assert!(writer.write_event(e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
}
}

Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
);
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (),
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() -> Result<(), quick_xml::Error> {
count += 1;
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() {
println!("{:?}", txt);
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub mod errors;
pub mod escape;
pub mod events;
pub mod name;
pub mod parser;
pub mod reader;
#[cfg(feature = "serialize")]
pub mod se;
Expand Down
4 changes: 2 additions & 2 deletions src/reader/element.rs → src/parser/element.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains a parser for an XML element.

use crate::errors::SyntaxError;
use crate::reader::Parser;
use crate::parser::Parser;

/// A parser that search a `>` symbol in the slice outside of quoted regions.
///
Expand All @@ -25,7 +25,7 @@ use crate::reader::Parser;
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::reader::{ElementParser, Parser};
/// use quick_xml::parser::{ElementParser, Parser};
///
/// let mut parser = ElementParser::default();
///
Expand Down
29 changes: 29 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//! Contains low-level parsers of different XML pieces.

use crate::errors::SyntaxError;

mod element;
mod pi;

pub use element::ElementParser;
pub use pi::PiParser;

/// Used to decouple reading of data from data source and parsing XML structure from it.
/// This is a state preserved between getting chunks of bytes from the reader.
///
/// This trait is implemented for every parser that processes piece of XML grammar.
pub trait Parser {
/// Process new data and try to determine end of the parsed thing.
///
/// Returns position of the end of thing in `bytes` in case of successful search
/// and `None` otherwise.
///
/// # Parameters
/// - `bytes`: a slice to find the end of a thing.
/// Should contain text in ASCII-compatible encoding
fn feed(&mut self, bytes: &[u8]) -> Option<usize>;

/// Returns parse error produced by this parser in case of reaching end of
/// input without finding the end of a parsed thing.
fn eof_error() -> SyntaxError;
}
4 changes: 2 additions & 2 deletions src/reader/pi.rs → src/parser/pi.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains a parser for an XML processing instruction.

use crate::errors::SyntaxError;
use crate::reader::Parser;
use crate::parser::Parser;

/// A parser that search a `?>` sequence in the slice.
///
Expand All @@ -19,7 +19,7 @@ use crate::reader::Parser;
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::reader::{Parser, PiParser};
/// use quick_xml::parser::{Parser, PiParser};
///
/// let mut parser = PiParser::default();
///
Expand Down
7 changes: 3 additions & 4 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ use tokio::io::{self, AsyncBufRead, AsyncBufReadExt};
use crate::errors::{Error, Result, SyntaxError};
use crate::events::Event;
use crate::name::{QName, ResolveResult};
use crate::parser::{ElementParser, Parser, PiParser};
use crate::reader::buffered_reader::impl_buffered_source;
use crate::reader::{
BangType, ElementParser, NsReader, ParseState, Parser, PiParser, ReadTextResult, Reader, Span,
};
use crate::reader::{BangType, NsReader, ParseState, ReadTextResult, Reader, Span};
use crate::utils::is_whitespace;

/// A struct for read XML asynchronously from an [`AsyncBufRead`].
Expand Down Expand Up @@ -59,7 +58,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// match reader.read_event_into_async(&mut buf).await {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
/// }
Expand Down
53 changes: 3 additions & 50 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use std::path::Path;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::reader::{BangType, Parser, ReadTextResult, Reader, Span, XmlSource};
use crate::parser::Parser;
use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
use crate::utils::is_whitespace;

macro_rules! impl_buffered_source {
Expand Down Expand Up @@ -100,54 +101,6 @@ macro_rules! impl_buffered_source {
ReadTextResult::UpToEof(&buf[start..])
}

#[inline]
$($async)? fn read_bytes_until $(<$lf>)? (
&mut self,
byte: u8,
buf: &'b mut Vec<u8>,
position: &mut u64,
) -> io::Result<(&'b [u8], bool)> {
// search byte must be within the ascii range
debug_assert!(byte.is_ascii());

let mut read = 0;
let start = buf.len();
loop {
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
Ok(n) if n.is_empty() => break,
Ok(n) => n,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => {
*position += read;
return Err(e);
}
};

match memchr::memchr(byte, available) {
Some(i) => {
buf.extend_from_slice(&available[..i]);

let used = i + 1;
self $(.$reader)? .consume(used);
read += used as u64;

*position += read;
return Ok((&buf[start..], true));
}
None => {
buf.extend_from_slice(available);

let used = available.len();
self $(.$reader)? .consume(used);
read += used as u64;
}
}
}

*position += read;
Ok((&buf[start..], false))
}

#[inline]
$($async)? fn read_with<$($lf,)? P: Parser>(
&mut self,
Expand Down Expand Up @@ -327,7 +280,7 @@ impl<R: BufRead> Reader<R> {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(_)) => count += 1,
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
/// Ok(Event::Eof) => break,
/// _ => (),
/// }
Expand Down
Loading

0 comments on commit 959eb55

Please sign in to comment.