diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index cb59a9d4ab2a6..df8a260624a28 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1170,6 +1170,17 @@ fn test_iterator() { assert_eq!(s.chars().count(), v.len()); } +#[test] +fn test_iterator_advance() { + let s = "「赤錆」と呼ばれる鉄錆は、水の存在下での鉄の自然酸化によって生じる、オキシ水酸化鉄(III) 等の(含水)酸化物粒子の疎な凝集膜であるとみなせる。"; + let chars: Vec = s.chars().collect(); + let mut it = s.chars(); + it.advance_by(1).unwrap(); + assert_eq!(it.next(), Some(chars[1])); + it.advance_by(33).unwrap(); + assert_eq!(it.next(), Some(chars[35])); +} + #[test] fn test_rev_iterator() { let s = "ศไทย中华Việt Nam"; diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index c30f01b3c06a4..d769587a435d5 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -8,6 +8,7 @@ use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::ops::Try; use crate::option; use crate::slice::{self, Split as SliceSplit}; +use core::num::NonZeroUsize; use super::from_utf8_unchecked; use super::pattern::Pattern; @@ -49,6 +50,48 @@ impl<'a> Iterator for Chars<'a> { super::count::count_chars(self.as_str()) } + #[inline] + fn advance_by(&mut self, mut remainder: usize) -> Result<(), NonZeroUsize> { + const CHUNK_SIZE: usize = 32; + + if remainder >= CHUNK_SIZE { + let mut chunks = self.iter.as_slice().array_chunks::(); + let mut bytes_skipped: usize = 0; + + while remainder > CHUNK_SIZE && let Some(chunk) = chunks.next() { + bytes_skipped += CHUNK_SIZE; + + let mut start_bytes = [false; CHUNK_SIZE]; + + for i in 0..CHUNK_SIZE { + start_bytes[i] = !super::validations::utf8_is_cont_byte(chunk[i]); + } + + remainder -= start_bytes.into_iter().map(|i| i as u8).sum::() as usize; + } + + unsafe { self.iter.advance_by(bytes_skipped).unwrap_unchecked() }; + + // skip trailing continuation bytes + while self.iter.len() > 0 { + let b = self.iter.as_slice()[0]; + if !super::validations::utf8_is_cont_byte(b) { + break + } + unsafe { self.iter.advance_by(1).unwrap_unchecked() }; + } + } + + while (remainder > 0) && (self.iter.len() > 0) { + remainder -= 1; + let b = self.iter.as_slice()[0]; + let slurp = super::validations::utf8_char_width(b); + unsafe { self.iter.advance_by(slurp).unwrap_unchecked() }; + } + + NonZeroUsize::new(remainder).map_or(Ok(()), Err) + } + #[inline] fn size_hint(&self) -> (usize, Option) { let len = self.iter.len();