Skip to content

Commit

Permalink
Add lengthiness
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Sep 25, 2024
1 parent f972835 commit 3eca922
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 55 deletions.
4 changes: 2 additions & 2 deletions utils/zerovec/src/hashmap/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ mod test {

const BINCODE_BYTES: &[u8] = &[
24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0,
3, 0, 0, 0, 0, 0, 1, 0, 2, 0, 98, 99, 97,
0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0,
3, 0, 0, 0, 1, 0, 2, 0, 98, 99, 97,
];

#[derive(Serialize, Deserialize)]
Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/src/map/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ mod test {

const JSON_STR: &str = "{\"1\":\"uno\",\"2\":\"dos\",\"3\":\"tres\"}";
const BINCODE_BYTES: &[u8] = &[
12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0,
0, 0, 0, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 3, 0,
0, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
];

fn make_map() -> ZeroMap<'static, u32, str> {
Expand Down
6 changes: 3 additions & 3 deletions utils/zerovec/src/map2d/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,8 @@ mod test {
const JSON_STR: &str = "{\"1\":{\"1\":\"uno\"},\"2\":{\"2\":\"dos\",\"3\":\"tres\"}}";
const BINCODE_BYTES: &[u8] = &[
8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0,
0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 18, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0,
6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
];

fn make_map() -> ZeroMap2d<'static, u32, u16, str> {
Expand Down Expand Up @@ -431,7 +431,7 @@ mod test {
bincode_bytes.as_slice(),
&[
2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0,
0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116, 104, 114, 101, 101
0, 0, 2, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 116, 104, 114, 101, 101
]
);
}
Expand Down
53 changes: 34 additions & 19 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ use core::mem;
use core::ops::Range;

// Also used by owned.rs
pub(super) const LENGTH_WIDTH: usize = 4;
pub(super) const METADATA_WIDTH: usize = 0;
pub(super) const MAX_LENGTH: usize = u32::MAX as usize;
pub(super) const MAX_INDEX: usize = u32::MAX as usize;

/// This trait allows switching between different possible internal
Expand All @@ -31,8 +29,15 @@ pub(super) const MAX_INDEX: usize = u32::MAX as usize;
/// and all of its associated items are hidden from the docs.
pub trait VarZeroVecFormat: 'static + Sized {
/// The type to use for the indexing array
///
/// Safety: must be a ULE for which all byte sequences are allowed
#[doc(hidden)]
type Index: IntegerULE;
/// The type to use for the length segment
///
/// Safety: must be a ULE for which all byte sequences are allowed
#[doc(hidden)]
type Len: IntegerULE;
}

/// This trait represents various ULE types that can be used to represent an integer
Expand Down Expand Up @@ -93,14 +98,17 @@ pub struct Index32;

impl VarZeroVecFormat for Index8 {
type Index = u8;
type Len = u8;
}

impl VarZeroVecFormat for Index16 {
type Index = RawBytesULE<2>;
type Len = RawBytesULE<2>;
}

impl VarZeroVecFormat for Index32 {
type Index = RawBytesULE<4>;
type Len = RawBytesULE<4>;
}

unsafe impl IntegerULE for u8 {
Expand Down Expand Up @@ -228,21 +236,22 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
});
}
let len_bytes = slice
.get(0..LENGTH_WIDTH)
.get(0..F::Len::SIZE)
.ok_or(VarZeroVecFormatError::Metadata)?;
let len_ule = RawBytesULE::<LENGTH_WIDTH>::parse_byte_slice(len_bytes)
.map_err(|_| VarZeroVecFormatError::Metadata)?;
let len_ule =
F::Len::parse_byte_slice(len_bytes).map_err(|_| VarZeroVecFormatError::Metadata)?;

let len = len_ule
.first()
.ok_or(VarZeroVecFormatError::Metadata)?
.as_unsigned_int();
.iule_to_usize();

let rest = slice
.get(LENGTH_WIDTH..)
.get(F::Len::SIZE..)
.ok_or(VarZeroVecFormatError::Metadata)?;
let len_u32 = u32::try_from(len).map_err(|_| VarZeroVecFormatError::Metadata);
// We pass down the rest of the invariants
Self::parse_byte_slice_with_length(len, rest)
Self::parse_byte_slice_with_length(len_u32?, rest)
}

/// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
Expand Down Expand Up @@ -303,16 +312,18 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
marker: PhantomData,
};
}
let len_bytes = slice.get_unchecked(0..LENGTH_WIDTH);
let len_ule = RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked(len_bytes);
let len_bytes = slice.get_unchecked(0..F::Len::SIZE);
// Safety: F::Len allows all byte sequences
let len_ule = F::Len::from_byte_slice_unchecked(len_bytes);

let len = len_ule.get_unchecked(0).as_unsigned_int();
let len = len_ule.get_unchecked(0).iule_to_usize();
let len_u32 = len as u32;

// Safety: This method requires the bytes to have passed through `parse_byte_slice()`
// whereas we're calling something that asks for `parse_byte_slice_with_length()`.
// The two methods perform similar validation, with parse_byte_slice() validating an additional
// 4-byte `length` header.
Self::from_bytes_unchecked_with_length(len, slice.get_unchecked(LENGTH_WIDTH..))
Self::from_bytes_unchecked_with_length(len_u32, slice.get_unchecked(F::Len::SIZE..))
}

/// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
Expand Down Expand Up @@ -582,7 +593,10 @@ where
{
debug_assert!(!elements.is_empty());
let len = compute_serializable_len::<T, A, F>(elements)?;
debug_assert!(len >= LENGTH_WIDTH as u32);
debug_assert!(
len >= F::Len::SIZE as u32,
"Must have at least F::Len::SIZE bytes to hold the length of the vector"
);
let mut output: Vec<u8> = alloc::vec![0; len as usize];
write_serializable_bytes::<T, A, F>(elements, &mut output);
Some(output)
Expand All @@ -602,7 +616,7 @@ where
A: EncodeAsVarULE<T>,
F: VarZeroVecFormat,
{
assert!(elements.len() <= MAX_LENGTH);
assert!(elements.len() <= F::Len::MAX_VALUE as usize);
if elements.is_empty() {
return;
}
Expand Down Expand Up @@ -657,13 +671,14 @@ where
if elements.is_empty() {
return;
}
assert!(elements.len() <= MAX_LENGTH);
let num_elements_bytes = elements.len().to_le_bytes();
assert!(elements.len() <= F::Len::MAX_VALUE as usize);
#[allow(clippy::expect_used)] // This function is explicitly panicky
let num_elements_ule = F::Len::iule_from_usize(elements.len()).expect(F::Len::TOO_LARGE_ERROR);
#[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
output[0..LENGTH_WIDTH].copy_from_slice(&num_elements_bytes[0..LENGTH_WIDTH]);
output[0..F::Len::SIZE].copy_from_slice(ULE::as_byte_slice(&[num_elements_ule]));

#[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
write_serializable_bytes_without_length::<T, A, F>(elements, &mut output[LENGTH_WIDTH..]);
write_serializable_bytes_without_length::<T, A, F>(elements, &mut output[F::Len::SIZE..]);
}

pub fn compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32>
Expand Down Expand Up @@ -698,5 +713,5 @@ where
A: EncodeAsVarULE<T>,
F: VarZeroVecFormat,
{
compute_serializable_len_without_length::<T, A, F>(elements).map(|x| x + LENGTH_WIDTH as u32)
compute_serializable_len_without_length::<T, A, F>(elements).map(|x| x + F::Len::SIZE as u32)
}
41 changes: 19 additions & 22 deletions utils/zerovec/src/varzerovec/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ use core::ops::Range;
use core::{fmt, ptr, slice};

use super::components::IntegerULE;
use super::components::LENGTH_WIDTH;
use super::components::MAX_INDEX;
use super::components::MAX_LENGTH;
use super::components::METADATA_WIDTH;

/// A fully-owned [`VarZeroVec`]. This type has no lifetime but has the same
Expand Down Expand Up @@ -132,12 +130,12 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
unsafe fn element_position_unchecked(&self, idx: usize) -> usize {
let len = self.len();
let out = if idx == len {
self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - (F::Index::SIZE * len)
self.entire_slice.len() - F::Len::SIZE - METADATA_WIDTH - (F::Index::SIZE * len)
} else {
self.index_data(idx).iule_to_usize()
};
debug_assert!(
out + LENGTH_WIDTH + METADATA_WIDTH + len * F::Index::SIZE <= self.entire_slice.len()
out + F::Len::SIZE + METADATA_WIDTH + len * F::Index::SIZE <= self.entire_slice.len()
);
out
}
Expand All @@ -158,15 +156,16 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
/// ## Safety
/// No safe functions may be called until `self.as_encoded_bytes()` is well-formed.
unsafe fn set_len(&mut self, len: usize) {
assert!(len <= MAX_LENGTH);
assert!(len <= F::Len::MAX_VALUE as usize);
let len_bytes = len.to_le_bytes();
self.entire_slice[0..LENGTH_WIDTH].copy_from_slice(&len_bytes[0..LENGTH_WIDTH]);
let len_ule = F::Len::iule_from_usize(len).expect(F::Len::TOO_LARGE_ERROR);
self.entire_slice[0..F::Len::SIZE].copy_from_slice(ULE::as_byte_slice(&[len_ule]));
// Double-check that the length fits in the length field
assert_eq!(len_bytes[LENGTH_WIDTH..].iter().sum::<u8>(), 0);
assert_eq!(len_bytes[F::Len::SIZE..].iter().sum::<u8>(), 0);
}

fn index_range(index: usize) -> Range<usize> {
let pos = LENGTH_WIDTH + METADATA_WIDTH + F::Index::SIZE * index;
let pos = F::Len::SIZE + METADATA_WIDTH + F::Index::SIZE * index;
pos..pos + F::Index::SIZE
}

Expand Down Expand Up @@ -203,8 +202,8 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
unsafe fn shift_indices(&mut self, starting_index: usize, amount: i32) {
let len = self.len();
let indices = F::Index::iule_from_byte_slice_unchecked_mut(
&mut self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH
..LENGTH_WIDTH + METADATA_WIDTH + F::Index::SIZE * len],
&mut self.entire_slice[F::Len::SIZE + METADATA_WIDTH
..F::Len::SIZE + METADATA_WIDTH + F::Index::SIZE * len],
);
for idx in &mut indices[starting_index..] {
let mut new_idx = idx.iule_to_usize();
Expand Down Expand Up @@ -293,7 +292,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
let old_slice_end = slice_range.start.add(slice_len);
let data_start = slice_range
.start
.add(LENGTH_WIDTH + METADATA_WIDTH + len * F::Index::SIZE);
.add(F::Len::SIZE + METADATA_WIDTH + len * F::Index::SIZE);
let prev_element_p =
data_start.add(prev_element.start)..data_start.add(prev_element.end);

Expand All @@ -304,7 +303,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
let index_range = {
let index_start = slice_range
.start
.add(LENGTH_WIDTH + METADATA_WIDTH + F::Index::SIZE * index);
.add(F::Len::SIZE + METADATA_WIDTH + F::Index::SIZE * index);
index_start..index_start.add(F::Index::SIZE)
};

Expand Down Expand Up @@ -354,7 +353,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
debug_assert!(self.verify_integrity());

// Return a mut slice to the new element data.
let element_pos = LENGTH_WIDTH
let element_pos = F::Len::SIZE
+ METADATA_WIDTH
+ self.len() * F::Index::SIZE
+ self.element_position_unchecked(index);
Expand All @@ -377,21 +376,19 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
_ => (),
}
let len = unsafe {
RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked(
&self.entire_slice[..LENGTH_WIDTH],
)[0]
.as_unsigned_int()
<F::Len as ULE>::from_byte_slice_unchecked(&self.entire_slice[..F::Len::SIZE])[0]
.iule_to_usize()
};
if len == 0 {
// An empty vec must have an empty slice: there is only a single valid byte representation.
return false;
}
if slice_len < LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::Index::SIZE {
if slice_len < F::Len::SIZE + METADATA_WIDTH + len * F::Index::SIZE {
// Not enough room for the indices.
return false;
}
let data_len =
self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - len as usize * F::Index::SIZE;
self.entire_slice.len() - F::Len::SIZE - METADATA_WIDTH - len * F::Index::SIZE;
if data_len > MAX_INDEX {
// The data segment is too long.
return false;
Expand All @@ -400,8 +397,8 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
// Test index validity.
let indices = unsafe {
F::Index::from_byte_slice_unchecked(
&self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH
..LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::Index::SIZE],
&self.entire_slice[F::Len::SIZE + METADATA_WIDTH
..F::Len::SIZE + METADATA_WIDTH + len * F::Index::SIZE],
)
};
for idx in indices {
Expand Down Expand Up @@ -434,7 +431,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
let value_len = element.encode_var_ule_len();

if len == 0 {
let header_len = LENGTH_WIDTH + METADATA_WIDTH + F::Index::SIZE;
let header_len = F::Len::SIZE + METADATA_WIDTH + F::Index::SIZE;
let cap = header_len + value_len;
self.entire_slice.resize(cap, 0);
self.entire_slice[0] = 1; // set length
Expand Down
14 changes: 7 additions & 7 deletions utils/zerovec/src/varzerovec/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,21 +190,21 @@ mod test {

// ["foo", "bar", "baz", "dolor", "quux", "lorem ipsum"];
const BYTES: &[u8] = &[
6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122,
100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115,
117, 109,
6, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122, 100,
111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115, 117,
109,
];
const JSON_STR: &str = "[\"foo\",\"bar\",\"baz\",\"dolor\",\"quux\",\"lorem ipsum\"]";
const BINCODE_BUF: &[u8] = &[
45, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111,
98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101,
109, 32, 105, 112, 115, 117, 109,
43, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97,
114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32,
105, 112, 115, 117, 109,
];

// ["w", "ω", "文", "𑄃"]
const NONASCII_STR: &[&str] = &["w", "ω", "文", "𑄃"];
const NONASCII_BYTES: &[u8] = &[
4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131,
4, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131,
];
#[test]
fn test_serde_json() {
Expand Down

0 comments on commit 3eca922

Please sign in to comment.