Skip to content

Commit

Permalink
also expose and use encode_utf16_raw for wtf8
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfJung committed May 30, 2020
1 parent 3182cdf commit 9c627c3
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 24 deletions.
59 changes: 37 additions & 22 deletions src/libcore/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,28 +701,7 @@ impl char {
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[inline]
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
let mut code = self as u32;
// SAFETY: each arm checks whether there are enough bits to write into
unsafe {
if (code & 0xFFFF) == code && !dst.is_empty() {
// The BMP falls through (assuming non-surrogate, as it should)
*dst.get_unchecked_mut(0) = code as u16;
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
} else if dst.len() >= 2 {
// Supplementary planes break into surrogates.
code -= 0x1_0000;
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
} else {
panic!(
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf16(),
code,
dst.len(),
)
}
}
encode_utf16_raw(self as u32, dst)
}

/// Returns `true` if this `char` has the `Alphabetic` property.
Expand Down Expand Up @@ -1692,3 +1671,39 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut str {
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
}

/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
/// and then returns the subslice of the buffer that contains the encoded character.
///
/// Unlike `char::encode_utf16`, this method can be called on codepoints in the surrogate range.
///
/// # Panics
///
/// Panics if the buffer is not large enough.
/// A buffer of length 2 is large enough to encode any `char`.
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
#[doc(hidden)]
#[inline]
pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
// SAFETY: each arm checks whether there are enough bits to write into
unsafe {
if (code & 0xFFFF) == code && !dst.is_empty() {
// The BMP falls through (assuming non-surrogate, as it should)
*dst.get_unchecked_mut(0) = code as u16;
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
} else if dst.len() >= 2 {
// Supplementary planes break into surrogates.
code -= 0x1_0000;
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
} else {
panic!(
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf16(),
code,
dst.len(),
)
}
}
}
2 changes: 2 additions & 0 deletions src/libcore/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub use crate::unicode::UNICODE_VERSION;

// perma-unstable re-exports
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf16_raw;
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf8_raw;

use crate::fmt::{self, Write};
Expand Down
3 changes: 1 addition & 2 deletions src/libstd/sys_common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,7 @@ impl<'a> Iterator for EncodeWide<'a> {

let mut buf = [0; 2];
self.code_points.next().map(|code_point| {
let c = unsafe { char::from_u32_unchecked(code_point.value) };
let n = c.encode_utf16(&mut buf).len();
let n = char::encode_utf16_raw(code_point.value, &mut buf).len();
if n == 2 {
self.extra = buf[1];
}
Expand Down

0 comments on commit 9c627c3

Please sign in to comment.