Skip to content

Commit

Permalink
RUST-648 Reintroduce Document decoding w/ lossy UTF-8 conversion (cro…
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickfreed authored Feb 12, 2021
1 parent d8d8ebe commit d6e269e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
20 changes: 17 additions & 3 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,7 @@ impl Document {
Ok(())
}

/// Attempts to deserialize a `Document` from a byte stream.
pub fn from_reader<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
fn decode<R: Read + ?Sized>(reader: &mut R, utf_lossy: bool) -> crate::de::Result<Document> {
let mut doc = Document::new();

let length = read_i32(reader)?;
Expand All @@ -550,7 +549,7 @@ impl Document {
break;
}

let (key, val) = deserialize_bson_kvp(cursor, tag, false)?;
let (key, val) = deserialize_bson_kvp(cursor, tag, utf_lossy)?;
doc.insert(key, val);
}
Ok(())
Expand All @@ -559,6 +558,21 @@ impl Document {

Ok(doc)
}

/// Attempts to deserialize a `Document` from a byte stream.
pub fn from_reader<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
Self::decode(reader, false)
}

/// Attempt to deserialize a `Document` that may contain invalid UTF-8 strings from a byte
/// stream.
///
/// This is mainly useful when reading raw BSON returned from a MongoDB server, which
/// in rare cases can contain invalidly truncated strings (https://jira.mongodb.org/browse/SERVER-24007).
/// For most use cases, `Document::from_reader` can be used instead.
pub fn from_reader_utf8_lossy<R: Read + ?Sized>(reader: &mut R) -> crate::de::Result<Document> {
Self::decode(reader, true)
}
}

pub struct Entry<'a> {
Expand Down
15 changes: 15 additions & 0 deletions src/tests/modules/serializer_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,21 @@ fn test_serialize_deserialize_utf8_string() {
assert_eq!(deserialized, doc);
}

#[test]
fn test_encode_decode_utf8_string_invalid() {
let bytes = b"\x80\xae".to_vec();
let src = unsafe { String::from_utf8_unchecked(bytes) };

let doc = doc! { "key": src };

let mut buf = Vec::new();
doc.to_writer(&mut buf).unwrap();

let expected = doc! { "key": "��" };
let decoded = Document::from_reader_utf8_lossy(&mut Cursor::new(buf)).unwrap();
assert_eq!(decoded, expected);
}

#[test]
fn test_serialize_deserialize_array() {
let _guard = LOCK.run_concurrently();
Expand Down

0 comments on commit d6e269e

Please sign in to comment.