From 06511573f236a07b8ecd7c3e3d5fef1d53e59352 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Mon, 11 Feb 2019 16:51:32 +0100 Subject: [PATCH 1/5] Remove sys::*::Stderr Write implementation --- src/libstd/io/mod.rs | 3 +++ src/libstd/io/stdio.rs | 8 ++++++-- src/libstd/sys/cloudabi/stdio.rs | 13 ------------- src/libstd/sys/redox/stdio.rs | 15 +-------------- src/libstd/sys/sgx/stdio.rs | 13 ------------- src/libstd/sys/unix/stdio.rs | 15 +-------------- src/libstd/sys/wasm/stdio.rs | 11 +---------- src/libstd/sys/windows/stdio.rs | 15 +-------------- 8 files changed, 13 insertions(+), 80 deletions(-) diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index c0570ae60a19c..4fd114884e367 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -286,6 +286,9 @@ pub use self::stdio::{_print, _eprint}; #[doc(no_inline, hidden)] pub use self::stdio::{set_panic, set_print}; +// Used inside the standard library for panic output. +pub(crate) use self::stdio::stderr_raw; + pub mod prelude; mod buffered; mod cursor; diff --git a/src/libstd/io/stdio.rs b/src/libstd/io/stdio.rs index 4068c0f9c7de5..4fc11db3ff52a 100644 --- a/src/libstd/io/stdio.rs +++ b/src/libstd/io/stdio.rs @@ -32,7 +32,9 @@ struct StdoutRaw(stdio::Stdout); /// /// This handle is not synchronized or buffered in any fashion. Constructed via /// the `std::io::stdio::stderr_raw` function. -struct StderrRaw(stdio::Stderr); +/// +/// Not exposed, but used inside the standard library for panic output. +pub(crate) struct StderrRaw(stdio::Stderr); /// Constructs a new raw handle to the standard input of this process. /// @@ -61,7 +63,9 @@ fn stdout_raw() -> io::Result { stdio::Stdout::new().map(StdoutRaw) } /// /// The returned handle has no external synchronization or buffering layered on /// top. -fn stderr_raw() -> io::Result { stdio::Stderr::new().map(StderrRaw) } +/// +/// Not exposed, but used inside the standard library for panic output. +pub(crate) fn stderr_raw() -> io::Result { stdio::Stderr::new().map(StderrRaw) } impl Read for StdinRaw { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.0.read(buf) } diff --git a/src/libstd/sys/cloudabi/stdio.rs b/src/libstd/sys/cloudabi/stdio.rs index 2cd3477cd519d..8ec92326dbd8c 100644 --- a/src/libstd/sys/cloudabi/stdio.rs +++ b/src/libstd/sys/cloudabi/stdio.rs @@ -49,19 +49,6 @@ impl Stderr { } } -// FIXME: right now this raw stderr handle is used in a few places because -// std::io::stderr_raw isn't exposed, but once that's exposed this impl -// should go away -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - Stderr::write(self, data) - } - - fn flush(&mut self) -> io::Result<()> { - Stderr::flush(self) - } -} - pub fn is_ebadf(err: &io::Error) -> bool { err.raw_os_error() == Some(abi::errno::BADF as i32) } diff --git a/src/libstd/sys/redox/stdio.rs b/src/libstd/sys/redox/stdio.rs index e814b0942c14d..66f84e1752752 100644 --- a/src/libstd/sys/redox/stdio.rs +++ b/src/libstd/sys/redox/stdio.rs @@ -47,19 +47,6 @@ impl Stderr { } } -// FIXME: right now this raw stderr handle is used in a few places because -// std::io::stderr_raw isn't exposed, but once that's exposed this impl -// should go away -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - Stderr::write(self, data) - } - - fn flush(&mut self) -> io::Result<()> { - Stderr::flush(self) - } -} - pub fn is_ebadf(err: &io::Error) -> bool { err.raw_os_error() == Some(::sys::syscall::EBADF as i32) } @@ -67,5 +54,5 @@ pub fn is_ebadf(err: &io::Error) -> bool { pub const STDIN_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE; pub fn panic_output() -> Option { - Stderr::new().ok() + io::stderr_raw().ok() } diff --git a/src/libstd/sys/sgx/stdio.rs b/src/libstd/sys/sgx/stdio.rs index 6f206cd9a51d7..c8efb931d6fe5 100644 --- a/src/libstd/sys/sgx/stdio.rs +++ b/src/libstd/sys/sgx/stdio.rs @@ -46,19 +46,6 @@ impl Stderr { } } -// FIXME: right now this raw stderr handle is used in a few places because -// std::io::stderr_raw isn't exposed, but once that's exposed this impl -// should go away -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - Stderr::write(self, data) - } - - fn flush(&mut self) -> io::Result<()> { - Stderr::flush(self) - } -} - pub const STDIN_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE; pub fn is_ebadf(err: &io::Error) -> bool { diff --git a/src/libstd/sys/unix/stdio.rs b/src/libstd/sys/unix/stdio.rs index 715f2eafb2d9b..e23723222be35 100644 --- a/src/libstd/sys/unix/stdio.rs +++ b/src/libstd/sys/unix/stdio.rs @@ -47,19 +47,6 @@ impl Stderr { } } -// FIXME: right now this raw stderr handle is used in a few places because -// std::io::stderr_raw isn't exposed, but once that's exposed this impl -// should go away -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - Stderr::write(self, data) - } - - fn flush(&mut self) -> io::Result<()> { - Stderr::flush(self) - } -} - pub fn is_ebadf(err: &io::Error) -> bool { err.raw_os_error() == Some(libc::EBADF as i32) } @@ -67,5 +54,5 @@ pub fn is_ebadf(err: &io::Error) -> bool { pub const STDIN_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE; pub fn panic_output() -> Option { - Stderr::new().ok() + io::stderr_raw().ok() } diff --git a/src/libstd/sys/wasm/stdio.rs b/src/libstd/sys/wasm/stdio.rs index 201619b2fb139..015d3f2066042 100644 --- a/src/libstd/sys/wasm/stdio.rs +++ b/src/libstd/sys/wasm/stdio.rs @@ -45,15 +45,6 @@ impl Stderr { } } -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - (&*self).write(data) - } - fn flush(&mut self) -> io::Result<()> { - (&*self).flush() - } -} - pub const STDIN_BUF_SIZE: usize = 0; pub fn is_ebadf(_err: &io::Error) -> bool { @@ -62,7 +53,7 @@ pub fn is_ebadf(_err: &io::Error) -> bool { pub fn panic_output() -> Option { if cfg!(feature = "wasm_syscall") { - Stderr::new().ok() + io::stderr_raw().ok() } else { None } diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs index 0ea19a855257b..45a12d075bad0 100644 --- a/src/libstd/sys/windows/stdio.rs +++ b/src/libstd/sys/windows/stdio.rs @@ -165,19 +165,6 @@ impl Stderr { } } -// FIXME: right now this raw stderr handle is used in a few places because -// std::io::stderr_raw isn't exposed, but once that's exposed this impl -// should go away -impl io::Write for Stderr { - fn write(&mut self, data: &[u8]) -> io::Result { - Stderr::write(self, data) - } - - fn flush(&mut self) -> io::Result<()> { - Stderr::flush(self) - } -} - impl Output { pub fn handle(&self) -> c::HANDLE { match *self { @@ -216,5 +203,5 @@ pub fn is_ebadf(err: &io::Error) -> bool { pub const STDIN_BUF_SIZE: usize = 8 * 1024; pub fn panic_output() -> Option { - Stderr::new().ok() + io::stderr_raw().ok() } From cc20ed678e4fc782cbbe55501bb3e300b5430c37 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Thu, 14 Feb 2019 06:29:10 +0100 Subject: [PATCH 2/5] Remove unused Read implementation on sys::Windows::Stdin --- src/libstd/sys/windows/stdio.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs index 45a12d075bad0..617124c36b30a 100644 --- a/src/libstd/sys/windows/stdio.rs +++ b/src/libstd/sys/windows/stdio.rs @@ -1,7 +1,5 @@ #![unstable(issue = "0", feature = "windows_stdio")] -use io::prelude::*; - use cmp; use io::{self, Cursor}; use ptr; @@ -130,13 +128,6 @@ impl Stdin { } } -#[unstable(reason = "not public", issue = "0", feature = "fd_read")] -impl<'a> Read for &'a Stdin { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - (**self).read(buf) - } -} - impl Stdout { pub fn new() -> io::Result { Ok(Stdout) From f411852add58637a3b8628a8f70146106bdc9719 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Thu, 14 Feb 2019 06:35:20 +0100 Subject: [PATCH 3/5] Refactor Windows stdio and remove stdin double buffering --- src/libstd/sys/windows/process.rs | 4 +- src/libstd/sys/windows/stdio.rs | 290 ++++++++++++++++++------------ 2 files changed, 178 insertions(+), 116 deletions(-) diff --git a/src/libstd/sys/windows/process.rs b/src/libstd/sys/windows/process.rs index 08a166bd8c504..2527168a968c4 100644 --- a/src/libstd/sys/windows/process.rs +++ b/src/libstd/sys/windows/process.rs @@ -252,9 +252,9 @@ impl Stdio { // should still be unavailable so propagate the // INVALID_HANDLE_VALUE. Stdio::Inherit => { - match stdio::get(stdio_id) { + match stdio::get_handle(stdio_id) { Ok(io) => { - let io = Handle::new(io.handle()); + let io = Handle::new(io); let ret = io.duplicate(0, true, c::DUPLICATE_SAME_ACCESS); io.into_raw(); diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs index 617124c36b30a..ccfe0ced82fbc 100644 --- a/src/libstd/sys/windows/stdio.rs +++ b/src/libstd/sys/windows/stdio.rs @@ -1,131 +1,226 @@ #![unstable(issue = "0", feature = "windows_stdio")] +use cell::Cell; use cmp; -use io::{self, Cursor}; +use io; use ptr; use str; -use sync::Mutex; use sys::c; use sys::cvt; use sys::handle::Handle; -pub enum Output { - Console(c::HANDLE), - Pipe(c::HANDLE), -} - +// Don't cache handles but get them fresh for every read/write. This allows us to track changes to +// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490. pub struct Stdin { - utf8: Mutex>>, + high_surrogate: Cell, } pub struct Stdout; pub struct Stderr; -pub fn get(handle: c::DWORD) -> io::Result { - let handle = unsafe { c::GetStdHandle(handle) }; +// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see +// #13304 for details). +// +// From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the +// process that is 64 KB in size. The maximum size of the buffer will depend on heap usage." +// +// We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far. +const MAX_BUFFER_SIZE: usize = 8192; + +// The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there +// are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from +// UTF-16 to UTF-8. +pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3; + +pub fn get_handle(handle_id: c::DWORD) -> io::Result { + let handle = unsafe { c::GetStdHandle(handle_id) }; if handle == c::INVALID_HANDLE_VALUE { Err(io::Error::last_os_error()) } else if handle.is_null() { Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32)) } else { - let mut out = 0; - match unsafe { c::GetConsoleMode(handle, &mut out) } { - 0 => Ok(Output::Pipe(handle)), - _ => Ok(Output::Console(handle)), - } + Ok(handle) } } -fn write(handle: c::DWORD, data: &[u8]) -> io::Result { - let handle = match get(handle)? { - Output::Console(c) => c, - Output::Pipe(p) => { - let handle = Handle::new(p); - let ret = handle.write(data); - handle.into_raw(); - return ret - } - }; +fn is_console(handle: c::HANDLE) -> bool { + // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported + // mode). This will only detect Windows Console, not other terminals connected to a pipe like + // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16. + let mut mode = 0; + unsafe { c::GetConsoleMode(handle, &mut mode) != 0 } +} - // As with stdin on windows, stdout often can't handle writes of large - // sizes. For an example, see #14940. For this reason, don't try to - // write the entire output buffer on windows. - // - // For some other references, it appears that this problem has been - // encountered by others [1] [2]. We choose the number 8K just because - // libuv does the same. +fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result { + let handle = get_handle(handle_id)?; + if !is_console(handle) { + let handle = Handle::new(handle); + let ret = handle.write(data); + handle.into_raw(); // Don't close the handle + return ret; + } + + // As the console is meant for presenting text, we assume bytes of `data` come from a string + // and are encoded as UTF-8, which needs to be encoded as UTF-16. // - // [1]: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232 - // [2]: http://www.mail-archive.com/log4net-dev@logging.apache.org/msg00661.html - const OUT_MAX: usize = 8192; - let len = cmp::min(data.len(), OUT_MAX); + // If the data is not valid UTF-8 we write out as many bytes as are valid. + // Only when there are no valid bytes (which will happen on the next call), return an error. + let len = cmp::min(data.len(), MAX_BUFFER_SIZE); let utf8 = match str::from_utf8(&data[..len]) { Ok(s) => s, - Err(ref e) if e.valid_up_to() == 0 => return Err(invalid_encoding()), + Err(ref e) if e.valid_up_to() == 0 => { + return Err(io::Error::new(io::ErrorKind::InvalidData, + "Windows stdio in console mode does not support non-UTF-8 byte sequences; \ + see https://github.com/rust-lang/rust/issues/23344")) + }, Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(), }; let utf16 = utf8.encode_utf16().collect::>(); + + let mut written = write_u16s(handle, &utf16)?; + + // Figure out how many bytes of as UTF-8 were written away as UTF-16. + if written >= utf16.len() { + Ok(utf8.len()) + } else { + // Make sure we didn't end up writing only half of a surrogate pair (even though the chance + // is tiny). Because it is not possible for user code to re-slice `data` in such a way that + // a missing surrogate can be produced (and also because of the UTF-8 validation above), + // write the missing surrogate out now. + // Buffering it would mean we have to lie about the number of bytes written. + let first_char_remaining = utf16[written]; + if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate + // We just hope this works, and give up otherwise + let _ = write_u16s(handle, &utf16[written..written]); + written += 1; + } + // Calculate the number of bytes of `utf8` that were actually written. + let mut count = 0; + for ch in utf16[..written].iter() { + count += match ch { + 0x0000 ..= 0x007F => 1, + 0x0080 ..= 0x07FF => 2, + 0xDCEE ..= 0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other. + _ => 3, + }; + } + Ok(count) + } +} + +fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result { let mut written = 0; cvt(unsafe { c::WriteConsoleW(handle, - utf16.as_ptr() as c::LPCVOID, - utf16.len() as u32, + data.as_ptr() as c::LPCVOID, + data.len() as u32, &mut written, ptr::null_mut()) })?; - - // FIXME if this only partially writes the utf16 buffer then we need to - // figure out how many bytes of `data` were actually written - assert_eq!(written as usize, utf16.len()); - Ok(utf8.len()) + Ok(written as usize) } impl Stdin { pub fn new() -> io::Result { - Ok(Stdin { - utf8: Mutex::new(Cursor::new(Vec::new())), - }) + Ok(Stdin { high_surrogate: Cell::new(0) }) } pub fn read(&self, buf: &mut [u8]) -> io::Result { - let handle = match get(c::STD_INPUT_HANDLE)? { - Output::Console(c) => c, - Output::Pipe(p) => { - let handle = Handle::new(p); - let ret = handle.read(buf); - handle.into_raw(); - return ret - } + let handle = get_handle(c::STD_INPUT_HANDLE)?; + if !is_console(handle) { + let handle = Handle::new(handle); + let ret = handle.read(buf); + handle.into_raw(); // Don't close the handle + return ret; + } + + if buf.len() == 0 { + return Ok(0); + } else if buf.len() < 4 { + return Err(io::Error::new(io::ErrorKind::InvalidInput, + "Windows stdin in console mode does not support a buffer too small to; \ + guarantee holding one arbitrary UTF-8 character (4 bytes)")) + } + + let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2]; + // In the worst case, an UTF-8 string can take 3 bytes for every `u16` of an UTF-16. So + // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets + // lost. + let amount = cmp::min(buf.len() / 3, utf16_buf.len()); + let read = self.read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount)?; + let utf16 = &utf16_buf[..read]; + + // FIXME: it would be nice if we could directly decode into the buffer instead of doing an + // allocation. + let data = match String::from_utf16(&utf16) { + Ok(utf8) => utf8.into_bytes(), + Err(..) => { + // We can't really do any better than forget all data and return an error. + return Err(io::Error::new(io::ErrorKind::InvalidData, + "Windows stdin in console mode does not support non-UTF-16 input; \ + encountered unpaired surrogate")) + }, }; - let mut utf8 = self.utf8.lock().unwrap(); - // Read more if the buffer is empty - if utf8.position() as usize == utf8.get_ref().len() { - let mut utf16 = vec![0u16; 0x1000]; - let mut num = 0; - let mut input_control = readconsole_input_control(CTRL_Z_MASK); - cvt(unsafe { - c::ReadConsoleW(handle, - utf16.as_mut_ptr() as c::LPVOID, - utf16.len() as u32, - &mut num, - &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL) - })?; - utf16.truncate(num as usize); - // FIXME: what to do about this data that has already been read? - let mut data = match String::from_utf16(&utf16) { - Ok(utf8) => utf8.into_bytes(), - Err(..) => return Err(invalid_encoding()), - }; - if let Some(&last_byte) = data.last() { - if last_byte == CTRL_Z { - data.pop(); - } + buf.copy_from_slice(&data); + Ok(data.len()) + } + + // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our + // buffer size, and keep it around for the next read hoping to put them together. + // This is a best effort, and may not work if we are not the only reader on Stdin. + pub fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize) + -> io::Result + { + // Insert possibly remaining unpaired surrogate from last read. + let mut start = 0; + if self.high_surrogate.get() != 0 { + buf[0] = self.high_surrogate.replace(0); + start = 1; + if amount == 1 { + // Special case: `Stdin::read` guarantees we can always read at least one new `u16` + // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least + // 4 bytes. + amount = 2; + } + } + let mut amount = read_u16s(handle, &mut buf[start..amount])? + start; + + if amount > 0 { + let last_char = buf[amount - 1]; + if last_char >= 0xD800 && last_char <= 0xDBFF { // high surrogate + self.high_surrogate.set(last_char); + amount -= 1; } - *utf8 = Cursor::new(data); } + Ok(amount) + } +} + +fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result { + // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the + // traditional DOS method to indicate end of character stream / user input (SUB). + // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole. + const CTRL_Z: u16 = 0x1A; + const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z; + let mut input_control = c::CONSOLE_READCONSOLE_CONTROL { + nLength: ::mem::size_of::() as c::ULONG, + nInitialChars: 0, + dwCtrlWakeupMask: CTRL_Z_MASK, + dwControlKeyState: 0, + }; + + let mut amount = 0; + cvt(unsafe { + c::ReadConsoleW(handle, + buf.as_mut_ptr() as c::LPVOID, + buf.len() as u32, + &mut amount, + &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL) + })?; - // MemReader shouldn't error here since we just filled it - utf8.read(buf) + if amount > 0 && buf[amount as usize - 1] == CTRL_Z { + amount -= 1; } + Ok(amount as usize) } impl Stdout { @@ -156,43 +251,10 @@ impl Stderr { } } -impl Output { - pub fn handle(&self) -> c::HANDLE { - match *self { - Output::Console(c) => c, - Output::Pipe(c) => c, - } - } -} - -fn invalid_encoding() -> io::Error { - io::Error::new(io::ErrorKind::InvalidData, - "Windows stdio in console mode does not support non-UTF-8 byte sequences; \ - see https://github.com/rust-lang/rust/issues/23344") -} - -fn readconsole_input_control(wakeup_mask: c::ULONG) -> c::CONSOLE_READCONSOLE_CONTROL { - c::CONSOLE_READCONSOLE_CONTROL { - nLength: ::mem::size_of::() as c::ULONG, - nInitialChars: 0, - dwCtrlWakeupMask: wakeup_mask, - dwControlKeyState: 0, - } -} - -const CTRL_Z: u8 = 0x1A; -const CTRL_Z_MASK: c::ULONG = 0x4000000; //1 << 0x1A - pub fn is_ebadf(err: &io::Error) -> bool { err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32) } -// The default buffer capacity is 64k, but apparently windows -// doesn't like 64k reads on stdin. See #13304 for details, but the -// idea is that on windows we use a slightly smaller buffer that's -// been seen to be acceptable. -pub const STDIN_BUF_SIZE: usize = 8 * 1024; - pub fn panic_output() -> Option { io::stderr_raw().ok() } From b09803e8694b3ad83f988e30b4f0a3903ebe2632 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 19 Feb 2019 15:57:59 +0100 Subject: [PATCH 4/5] Address review comments --- src/libstd/sys/windows/stdio.rs | 58 ++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs index ccfe0ced82fbc..6b9b36f6a924b 100644 --- a/src/libstd/sys/windows/stdio.rs +++ b/src/libstd/sys/windows/stdio.rs @@ -1,6 +1,7 @@ #![unstable(issue = "0", feature = "windows_stdio")] use cell::Cell; +use char::decode_utf16; use cmp; use io; use ptr; @@ -64,22 +65,27 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result { // // If the data is not valid UTF-8 we write out as many bytes as are valid. // Only when there are no valid bytes (which will happen on the next call), return an error. - let len = cmp::min(data.len(), MAX_BUFFER_SIZE); + let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2); let utf8 = match str::from_utf8(&data[..len]) { Ok(s) => s, Err(ref e) if e.valid_up_to() == 0 => { return Err(io::Error::new(io::ErrorKind::InvalidData, - "Windows stdio in console mode does not support non-UTF-8 byte sequences; \ - see https://github.com/rust-lang/rust/issues/23344")) + "Windows stdio in console mode does not support writing non-UTF-8 byte sequences")) }, Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(), }; - let utf16 = utf8.encode_utf16().collect::>(); + let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2]; + let mut len_utf16 = 0; + for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) { + *dest = chr; + len_utf16 += 1; + } + let utf16 = &utf16[..len_utf16]; let mut written = write_u16s(handle, &utf16)?; // Figure out how many bytes of as UTF-8 were written away as UTF-16. - if written >= utf16.len() { + if written == utf16.len() { Ok(utf8.len()) } else { // Make sure we didn't end up writing only half of a surrogate pair (even though the chance @@ -90,7 +96,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result { let first_char_remaining = utf16[written]; if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate // We just hope this works, and give up otherwise - let _ = write_u16s(handle, &utf16[written..written]); + let _ = write_u16s(handle, &utf16[written..written+1]); written += 1; } // Calculate the number of bytes of `utf8` that were actually written. @@ -103,6 +109,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result { _ => 3, }; } + debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]); Ok(count) } } @@ -137,7 +144,7 @@ impl Stdin { return Ok(0); } else if buf.len() < 4 { return Err(io::Error::new(io::ErrorKind::InvalidInput, - "Windows stdin in console mode does not support a buffer too small to; \ + "Windows stdin in console mode does not support a buffer too small to \ guarantee holding one arbitrary UTF-8 character (4 bytes)")) } @@ -147,27 +154,14 @@ impl Stdin { // lost. let amount = cmp::min(buf.len() / 3, utf16_buf.len()); let read = self.read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount)?; - let utf16 = &utf16_buf[..read]; - // FIXME: it would be nice if we could directly decode into the buffer instead of doing an - // allocation. - let data = match String::from_utf16(&utf16) { - Ok(utf8) => utf8.into_bytes(), - Err(..) => { - // We can't really do any better than forget all data and return an error. - return Err(io::Error::new(io::ErrorKind::InvalidData, - "Windows stdin in console mode does not support non-UTF-16 input; \ - encountered unpaired surrogate")) - }, - }; - buf.copy_from_slice(&data); - Ok(data.len()) + utf16_to_utf8(&utf16_buf[..read], buf) } // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our // buffer size, and keep it around for the next read hoping to put them together. // This is a best effort, and may not work if we are not the only reader on Stdin. - pub fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize) + fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize) -> io::Result { // Insert possibly remaining unpaired surrogate from last read. @@ -223,6 +217,26 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result { Ok(amount as usize) } +#[allow(unused)] +fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result { + let mut written = 0; + for chr in decode_utf16(utf16.iter().cloned()) { + match chr { + Ok(chr) => { + chr.encode_utf8(&mut utf8[written..]); + written += chr.len_utf8(); + } + Err(_) => { + // We can't really do any better than forget all data and return an error. + return Err(io::Error::new(io::ErrorKind::InvalidData, + "Windows stdin in console mode does not support non-UTF-16 input; \ + encountered unpaired surrogate")) + } + } + } + Ok(written) +} + impl Stdout { pub fn new() -> io::Result { Ok(Stdout) From 6464e32ea97fe0b18f75becc82cba9b19dfe453c Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 20 Feb 2019 08:18:29 +0100 Subject: [PATCH 5/5] Use standard Read/Write traits in sys::stdio --- src/libstd/sys/cloudabi/stdio.rs | 16 +++++-- src/libstd/sys/redox/stdio.rs | 22 +++++---- src/libstd/sys/sgx/stdio.rs | 22 +++++---- src/libstd/sys/unix/stdio.rs | 22 +++++---- src/libstd/sys/wasm/stdio.rs | 26 ++++++---- src/libstd/sys/windows/stdio.rs | 81 ++++++++++++++++++-------------- 6 files changed, 114 insertions(+), 75 deletions(-) diff --git a/src/libstd/sys/cloudabi/stdio.rs b/src/libstd/sys/cloudabi/stdio.rs index 8ec92326dbd8c..81d79213f615c 100644 --- a/src/libstd/sys/cloudabi/stdio.rs +++ b/src/libstd/sys/cloudabi/stdio.rs @@ -9,8 +9,10 @@ impl Stdin { pub fn new() -> io::Result { Ok(Stdin(())) } +} - pub fn read(&self, _: &mut [u8]) -> io::Result { +impl io::Read for Stdin { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { Ok(0) } } @@ -19,15 +21,17 @@ impl Stdout { pub fn new() -> io::Result { Ok(Stdout(())) } +} - pub fn write(&self, _: &[u8]) -> io::Result { +impl io::Write for Stdout { + fn write(&mut self, _buf: &[u8]) -> io::Result { Err(io::Error::new( io::ErrorKind::BrokenPipe, "Stdout is not connected to any output in this environment", )) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } @@ -36,15 +40,17 @@ impl Stderr { pub fn new() -> io::Result { Ok(Stderr(())) } +} - pub fn write(&self, _: &[u8]) -> io::Result { +impl io::Write for Stderr { + fn write(&mut self, _buf: &[u8]) -> io::Result { Err(io::Error::new( io::ErrorKind::BrokenPipe, "Stderr is not connected to any output in this environment", )) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } diff --git a/src/libstd/sys/redox/stdio.rs b/src/libstd/sys/redox/stdio.rs index 66f84e1752752..b4eb01fd6cc52 100644 --- a/src/libstd/sys/redox/stdio.rs +++ b/src/libstd/sys/redox/stdio.rs @@ -8,10 +8,12 @@ pub struct Stderr(()); impl Stdin { pub fn new() -> io::Result { Ok(Stdin(())) } +} - pub fn read(&self, data: &mut [u8]) -> io::Result { +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { let fd = FileDesc::new(0); - let ret = fd.read(data); + let ret = fd.read(buf); fd.into_raw(); ret } @@ -19,30 +21,34 @@ impl Stdin { impl Stdout { pub fn new() -> io::Result { Ok(Stdout(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { let fd = FileDesc::new(1); - let ret = fd.write(data); + let ret = fd.write(buf); fd.into_raw(); ret } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { cvt(syscall::fsync(1)).and(Ok(())) } } impl Stderr { pub fn new() -> io::Result { Ok(Stderr(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { let fd = FileDesc::new(2); - let ret = fd.write(data); + let ret = fd.write(buf); fd.into_raw(); ret } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { cvt(syscall::fsync(2)).and(Ok(())) } } diff --git a/src/libstd/sys/sgx/stdio.rs b/src/libstd/sys/sgx/stdio.rs index c8efb931d6fe5..57d66ed9a853c 100644 --- a/src/libstd/sys/sgx/stdio.rs +++ b/src/libstd/sys/sgx/stdio.rs @@ -16,32 +16,38 @@ fn with_std_fd R, R>(fd: abi::Fd, f: F) -> R { impl Stdin { pub fn new() -> io::Result { Ok(Stdin(())) } +} - pub fn read(&self, data: &mut [u8]) -> io::Result { - with_std_fd(abi::FD_STDIN, |fd| fd.read(data)) +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + with_std_fd(abi::FD_STDIN, |fd| fd.read(buf)) } } impl Stdout { pub fn new() -> io::Result { Ok(Stdout(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - with_std_fd(abi::FD_STDOUT, |fd| fd.write(data)) +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { + with_std_fd(abi::FD_STDOUT, |fd| fd.write(buf)) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { with_std_fd(abi::FD_STDOUT, |fd| fd.flush()) } } impl Stderr { pub fn new() -> io::Result { Ok(Stderr(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - with_std_fd(abi::FD_STDERR, |fd| fd.write(data)) +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { + with_std_fd(abi::FD_STDERR, |fd| fd.write(buf)) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { with_std_fd(abi::FD_STDERR, |fd| fd.flush()) } } diff --git a/src/libstd/sys/unix/stdio.rs b/src/libstd/sys/unix/stdio.rs index e23723222be35..82bd2fbf77612 100644 --- a/src/libstd/sys/unix/stdio.rs +++ b/src/libstd/sys/unix/stdio.rs @@ -8,10 +8,12 @@ pub struct Stderr(()); impl Stdin { pub fn new() -> io::Result { Ok(Stdin(())) } +} - pub fn read(&self, data: &mut [u8]) -> io::Result { +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { let fd = FileDesc::new(libc::STDIN_FILENO); - let ret = fd.read(data); + let ret = fd.read(buf); fd.into_raw(); // do not close this FD ret } @@ -19,30 +21,34 @@ impl Stdin { impl Stdout { pub fn new() -> io::Result { Ok(Stdout(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { let fd = FileDesc::new(libc::STDOUT_FILENO); - let ret = fd.write(data); + let ret = fd.write(buf); fd.into_raw(); // do not close this FD ret } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } impl Stderr { pub fn new() -> io::Result { Ok(Stderr(())) } +} - pub fn write(&self, data: &[u8]) -> io::Result { +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { let fd = FileDesc::new(libc::STDERR_FILENO); - let ret = fd.write(data); + let ret = fd.write(buf); fd.into_raw(); // do not close this FD ret } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } diff --git a/src/libstd/sys/wasm/stdio.rs b/src/libstd/sys/wasm/stdio.rs index 015d3f2066042..657655004e9bf 100644 --- a/src/libstd/sys/wasm/stdio.rs +++ b/src/libstd/sys/wasm/stdio.rs @@ -9,9 +9,11 @@ impl Stdin { pub fn new() -> io::Result { Ok(Stdin) } +} - pub fn read(&self, data: &mut [u8]) -> io::Result { - Ok(ReadSysCall::perform(0, data)) +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + Ok(ReadSysCall::perform(0, buf)) } } @@ -19,13 +21,15 @@ impl Stdout { pub fn new() -> io::Result { Ok(Stdout) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - WriteSysCall::perform(1, data); - Ok(data.len()) +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { + WriteSysCall::perform(1, buf); + Ok(buf.len()) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } @@ -34,13 +38,15 @@ impl Stderr { pub fn new() -> io::Result { Ok(Stderr) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - WriteSysCall::perform(2, data); - Ok(data.len()) +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { + WriteSysCall::perform(2, buf); + Ok(buf.len()) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs index 6b9b36f6a924b..5963541a89338 100644 --- a/src/libstd/sys/windows/stdio.rs +++ b/src/libstd/sys/windows/stdio.rs @@ -1,6 +1,5 @@ #![unstable(issue = "0", feature = "windows_stdio")] -use cell::Cell; use char::decode_utf16; use cmp; use io; @@ -13,7 +12,7 @@ use sys::handle::Handle; // Don't cache handles but get them fresh for every read/write. This allows us to track changes to // the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490. pub struct Stdin { - high_surrogate: Cell, + surrogate: u16, } pub struct Stdout; pub struct Stderr; @@ -128,10 +127,12 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result { impl Stdin { pub fn new() -> io::Result { - Ok(Stdin { high_surrogate: Cell::new(0) }) + Ok(Stdin { surrogate: 0 }) } +} - pub fn read(&self, buf: &mut [u8]) -> io::Result { +impl io::Read for Stdin { + fn read(&mut self, buf: &mut [u8]) -> io::Result { let handle = get_handle(c::STD_INPUT_HANDLE)?; if !is_console(handle) { let handle = Handle::new(handle); @@ -153,40 +154,44 @@ impl Stdin { // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets // lost. let amount = cmp::min(buf.len() / 3, utf16_buf.len()); - let read = self.read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount)?; + let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?; utf16_to_utf8(&utf16_buf[..read], buf) } +} - // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our - // buffer size, and keep it around for the next read hoping to put them together. - // This is a best effort, and may not work if we are not the only reader on Stdin. - fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize) - -> io::Result - { - // Insert possibly remaining unpaired surrogate from last read. - let mut start = 0; - if self.high_surrogate.get() != 0 { - buf[0] = self.high_surrogate.replace(0); - start = 1; - if amount == 1 { - // Special case: `Stdin::read` guarantees we can always read at least one new `u16` - // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least - // 4 bytes. - amount = 2; - } + +// We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our +// buffer size, and keep it around for the next read hoping to put them together. +// This is a best effort, and may not work if we are not the only reader on Stdin. +fn read_u16s_fixup_surrogates(handle: c::HANDLE, + buf: &mut [u16], + mut amount: usize, + surrogate: &mut u16) -> io::Result +{ + // Insert possibly remaining unpaired surrogate from last read. + let mut start = 0; + if *surrogate != 0 { + buf[0] = *surrogate; + *surrogate = 0; + start = 1; + if amount == 1 { + // Special case: `Stdin::read` guarantees we can always read at least one new `u16` + // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least + // 4 bytes. + amount = 2; } - let mut amount = read_u16s(handle, &mut buf[start..amount])? + start; + } + let mut amount = read_u16s(handle, &mut buf[start..amount])? + start; - if amount > 0 { - let last_char = buf[amount - 1]; - if last_char >= 0xD800 && last_char <= 0xDBFF { // high surrogate - self.high_surrogate.set(last_char); - amount -= 1; - } + if amount > 0 { + let last_char = buf[amount - 1]; + if last_char >= 0xD800 && last_char <= 0xDBFF { // high surrogate + *surrogate = last_char; + amount -= 1; } - Ok(amount) } + Ok(amount) } fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result { @@ -241,12 +246,14 @@ impl Stdout { pub fn new() -> io::Result { Ok(Stdout) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - write(c::STD_OUTPUT_HANDLE, data) +impl io::Write for Stdout { + fn write(&mut self, buf: &[u8]) -> io::Result { + write(c::STD_OUTPUT_HANDLE, buf) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } } @@ -255,12 +262,14 @@ impl Stderr { pub fn new() -> io::Result { Ok(Stderr) } +} - pub fn write(&self, data: &[u8]) -> io::Result { - write(c::STD_ERROR_HANDLE, data) +impl io::Write for Stderr { + fn write(&mut self, buf: &[u8]) -> io::Result { + write(c::STD_ERROR_HANDLE, buf) } - pub fn flush(&self) -> io::Result<()> { + fn flush(&mut self) -> io::Result<()> { Ok(()) } }