From 07000dc244cbd4fc1f9850618edd2952d65a38bd Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Wed, 25 Oct 2023 21:39:55 +0530 Subject: [PATCH 1/3] Improve UEFI stdio - Do not drop any character while reading - eabdf == Unsupported status - loop untill read character or error encountered Signed-off-by: Ayush Singh --- library/std/src/sys/pal/uefi/stdio.rs | 56 +++++++++++++++++---------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/library/std/src/sys/pal/uefi/stdio.rs b/library/std/src/sys/pal/uefi/stdio.rs index a533d8a057506..7f384bca9c02f 100644 --- a/library/std/src/sys/pal/uefi/stdio.rs +++ b/library/std/src/sys/pal/uefi/stdio.rs @@ -6,43 +6,49 @@ use crate::ptr::NonNull; const MAX_BUFFER_SIZE: usize = 8192; -pub struct Stdin; +pub struct Stdin { + pending: Option, +} + pub struct Stdout; pub struct Stderr; impl Stdin { pub const fn new() -> Stdin { - Stdin + Stdin { pending: None } } } impl io::Read for Stdin { - fn read(&mut self, buf: &mut [u8]) -> io::Result { + fn read(&mut self, mut buf: &mut [u8]) -> io::Result { let st: NonNull = uefi::env::system_table().cast(); let stdin = unsafe { (*st.as_ptr()).con_in }; - // Try reading any pending data - let inp = match read_key_stroke(stdin) { - Ok(x) => x, - Err(e) if e == r_efi::efi::Status::NOT_READY => { - // Wait for keypress for new data - wait_stdin(stdin)?; - read_key_stroke(stdin).map_err(|x| io::Error::from_raw_os_error(x.as_usize()))? + // Write any pending character + if let Some(ch) = self.pending { + if ch.len_utf8() > buf.len() { + return Ok(0); } - Err(e) => { - return Err(io::Error::from_raw_os_error(e.as_usize())); - } - }; + ch.encode_utf8(buf); + buf = &mut buf[ch.len_utf8()..]; + self.pending = None; + } + + // Try reading any pending data + let inp = read(stdin)?; // Check if the key is printiable character - if inp.scan_code != 0x00 { + if inp == 0x00 { return Err(io::const_io_error!(io::ErrorKind::Interrupted, "Special Key Press")); } - // SAFETY: Iterator will have only 1 character since we are reading only 1 Key - // SAFETY: This character will always be UCS-2 and thus no surrogates. - let ch: char = char::decode_utf16([inp.unicode_char]).next().unwrap().unwrap(); + // The option unwrap is safe since iterator will have 1 element. + let ch: char = char::decode_utf16([inp]) + .next() + .unwrap() + .map_err(|_| io::const_io_error!(io::ErrorKind::InvalidInput, "Invalid Input"))?; if ch.len_utf8() > buf.len() { + self.pending = Some(ch); return Ok(0); } @@ -93,8 +99,8 @@ impl io::Write for Stderr { // UCS-2 character should occupy 3 bytes at most in UTF-8 pub const STDIN_BUF_SIZE: usize = 3; -pub fn is_ebadf(_err: &io::Error) -> bool { - true +pub fn is_ebadf(err: &io::Error) -> bool { + err.raw_os_error() == Some(r_efi::efi::Status::UNSUPPORTED.as_usize()) } pub fn panic_output() -> Option { @@ -132,6 +138,16 @@ unsafe fn simple_text_output( if res.is_error() { Err(io::Error::from_raw_os_error(res.as_usize())) } else { Ok(()) } } +fn read(stdin: *mut r_efi::protocols::simple_text_input::Protocol) -> io::Result { + loop { + match read_key_stroke(stdin) { + Ok(x) => return Ok(x.unicode_char), + Err(e) if e == r_efi::efi::Status::NOT_READY => wait_stdin(stdin)?, + Err(e) => return Err(io::Error::from_raw_os_error(e.as_usize())), + } + } +} + fn wait_stdin(stdin: *mut r_efi::protocols::simple_text_input::Protocol) -> io::Result<()> { let boot_services: NonNull = uefi::env::boot_services().unwrap().cast(); From 346472c6be39db0236d606d4215d4f93d12451ef Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Wed, 25 Oct 2023 21:48:41 +0530 Subject: [PATCH 2/3] Use heap for stdout and stderr Signed-off-by: Ayush Singh --- library/std/src/sys/pal/uefi/stdio.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/library/std/src/sys/pal/uefi/stdio.rs b/library/std/src/sys/pal/uefi/stdio.rs index 7f384bca9c02f..9adccee9730bc 100644 --- a/library/std/src/sys/pal/uefi/stdio.rs +++ b/library/std/src/sys/pal/uefi/stdio.rs @@ -4,8 +4,6 @@ use crate::mem::MaybeUninit; use crate::os::uefi; use crate::ptr::NonNull; -const MAX_BUFFER_SIZE: usize = 8192; - pub struct Stdin { pending: Option, } @@ -111,19 +109,14 @@ fn write( protocol: *mut r_efi::protocols::simple_text_output::Protocol, buf: &[u8], ) -> io::Result { - let mut utf16 = [0; MAX_BUFFER_SIZE / 2]; - // Get valid UTF-8 buffer let utf8 = match crate::str::from_utf8(buf) { Ok(x) => x, Err(e) => unsafe { crate::str::from_utf8_unchecked(&buf[..e.valid_up_to()]) }, }; - // Clip UTF-8 buffer to max UTF-16 buffer we support - let utf8 = &utf8[..utf8.floor_char_boundary(utf16.len() - 1)]; - for (i, ch) in utf8.encode_utf16().enumerate() { - utf16[i] = ch; - } + let mut utf16: Vec = utf8.encode_utf16().collect(); + utf16.push(0); unsafe { simple_text_output(protocol, &mut utf16) }?; From 1fbb00bc3cb68736692935b68dbab3ba8bda4bc9 Mon Sep 17 00:00:00 2001 From: Ayush Singh Date: Mon, 30 Oct 2023 18:57:12 +0530 Subject: [PATCH 3/3] Fixes from PR - is_ebadf always returns false - Allow reading partial characters to buffer - Allow full UTF-16 in stdin Signed-off-by: Ayush Singh --- library/std/src/sys/pal/uefi/stdio.rs | 120 ++++++++++++++++++-------- 1 file changed, 84 insertions(+), 36 deletions(-) diff --git a/library/std/src/sys/pal/uefi/stdio.rs b/library/std/src/sys/pal/uefi/stdio.rs index 9adccee9730bc..703e8ba8e5710 100644 --- a/library/std/src/sys/pal/uefi/stdio.rs +++ b/library/std/src/sys/pal/uefi/stdio.rs @@ -5,7 +5,36 @@ use crate::os::uefi; use crate::ptr::NonNull; pub struct Stdin { - pending: Option, + surrogate: Option, + incomplete_utf8: IncompleteUtf8, +} + +struct IncompleteUtf8 { + bytes: [u8; 4], + len: u8, +} + +impl IncompleteUtf8 { + pub const fn new() -> IncompleteUtf8 { + IncompleteUtf8 { bytes: [0; 4], len: 0 } + } + + // Implemented for use in Stdin::read. + fn read(&mut self, buf: &mut [u8]) -> usize { + // Write to buffer until the buffer is full or we run out of bytes. + let to_write = crate::cmp::min(buf.len(), self.len as usize); + buf[..to_write].copy_from_slice(&self.bytes[..to_write]); + + // Rotate the remaining bytes if not enough remaining space in buffer. + if usize::from(self.len) > buf.len() { + self.bytes.copy_within(to_write.., 0); + self.len -= to_write as u8; + } else { + self.len = 0; + } + + to_write + } } pub struct Stdout; @@ -13,46 +42,62 @@ pub struct Stderr; impl Stdin { pub const fn new() -> Stdin { - Stdin { pending: None } + Stdin { surrogate: None, incomplete_utf8: IncompleteUtf8::new() } } } impl io::Read for Stdin { - fn read(&mut self, mut buf: &mut [u8]) -> io::Result { - let st: NonNull = uefi::env::system_table().cast(); - let stdin = unsafe { (*st.as_ptr()).con_in }; - - // Write any pending character - if let Some(ch) = self.pending { - if ch.len_utf8() > buf.len() { - return Ok(0); - } - ch.encode_utf8(buf); - buf = &mut buf[ch.len_utf8()..]; - self.pending = None; + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // If there are bytes in the incomplete utf-8, start with those. + // (No-op if there is nothing in the buffer.) + let mut bytes_copied = self.incomplete_utf8.read(buf); + + let stdin: *mut r_efi::protocols::simple_text_input::Protocol = unsafe { + let st: NonNull = uefi::env::system_table().cast(); + (*st.as_ptr()).con_in + }; + + if bytes_copied == buf.len() { + return Ok(bytes_copied); } - // Try reading any pending data - let inp = read(stdin)?; - - // Check if the key is printiable character - if inp == 0x00 { - return Err(io::const_io_error!(io::ErrorKind::Interrupted, "Special Key Press")); + let ch = simple_text_input_read(stdin)?; + // Only 1 character should be returned. + let mut ch: Vec> = + if let Some(x) = self.surrogate.take() { + char::decode_utf16([x, ch]).collect() + } else { + char::decode_utf16([ch]).collect() + }; + + if ch.len() > 1 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid utf-16 sequence")); } - // The option unwrap is safe since iterator will have 1 element. - let ch: char = char::decode_utf16([inp]) - .next() - .unwrap() - .map_err(|_| io::const_io_error!(io::ErrorKind::InvalidInput, "Invalid Input"))?; - if ch.len_utf8() > buf.len() { - self.pending = Some(ch); - return Ok(0); + match ch.pop().unwrap() { + Err(e) => { + self.surrogate = Some(e.unpaired_surrogate()); + } + Ok(x) => { + // This will always be > 0 + let buf_free_count = buf.len() - bytes_copied; + assert!(buf_free_count > 0); + + if buf_free_count >= x.len_utf8() { + // There is enough space in the buffer for the character. + bytes_copied += x.encode_utf8(&mut buf[bytes_copied..]).len(); + } else { + // There is not enough space in the buffer for the character. + // Store the character in the incomplete buffer. + self.incomplete_utf8.len = + x.encode_utf8(&mut self.incomplete_utf8.bytes).len() as u8; + // write partial character to buffer. + bytes_copied += self.incomplete_utf8.read(buf); + } + } } - ch.encode_utf8(buf); - - Ok(ch.len_utf8()) + Ok(bytes_copied) } } @@ -94,11 +139,11 @@ impl io::Write for Stderr { } } -// UCS-2 character should occupy 3 bytes at most in UTF-8 -pub const STDIN_BUF_SIZE: usize = 3; +// UTF-16 character should occupy 4 bytes at most in UTF-8 +pub const STDIN_BUF_SIZE: usize = 4; -pub fn is_ebadf(err: &io::Error) -> bool { - err.raw_os_error() == Some(r_efi::efi::Status::UNSUPPORTED.as_usize()) +pub fn is_ebadf(_err: &io::Error) -> bool { + false } pub fn panic_output() -> Option { @@ -116,6 +161,7 @@ fn write( }; let mut utf16: Vec = utf8.encode_utf16().collect(); + // NULL terminate the string utf16.push(0); unsafe { simple_text_output(protocol, &mut utf16) }?; @@ -131,7 +177,9 @@ unsafe fn simple_text_output( if res.is_error() { Err(io::Error::from_raw_os_error(res.as_usize())) } else { Ok(()) } } -fn read(stdin: *mut r_efi::protocols::simple_text_input::Protocol) -> io::Result { +fn simple_text_input_read( + stdin: *mut r_efi::protocols::simple_text_input::Protocol, +) -> io::Result { loop { match read_key_stroke(stdin) { Ok(x) => return Ok(x.unicode_char),