Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Unreadable non-UTF-8 output on localized MSVC #110586

Merged
merged 2 commits into from
Apr 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3250,6 +3250,7 @@ dependencies = [
"tempfile",
"thorin-dwp",
"tracing",
"windows 0.46.0",
]

[[package]]
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_codegen_ssa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ libc = "0.2.50"
version = "0.30.1"
default-features = false
features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"]

[target.'cfg(windows)'.dependencies.windows]
version = "0.46.0"
features = ["Win32_Globalization"]
79 changes: 78 additions & 1 deletion compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ fn link_natively<'a>(
if !prog.status.success() {
let mut output = prog.stderr.clone();
output.extend_from_slice(&prog.stdout);
let escaped_output = escape_string(&output);
let escaped_output = escape_linker_output(&output, flavor);
// FIXME: Add UI tests for this error.
let err = errors::LinkingFailed {
linker_path: &linker_path,
Expand Down Expand Up @@ -1049,6 +1049,83 @@ fn escape_string(s: &[u8]) -> String {
}
}

#[cfg(not(windows))]
fn escape_linker_output(s: &[u8], _flavour: LinkerFlavor) -> String {
escape_string(s)
}

/// If the output of the msvc linker is not UTF-8 and the host is Windows,
/// then try to convert the string from the OEM encoding.
#[cfg(windows)]
fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
// This only applies to the actual MSVC linker.
if flavour != LinkerFlavor::Msvc(Lld::No) {
return escape_string(s);
}
match str::from_utf8(s) {
Ok(s) => return s.to_owned(),
Err(_) => match win::locale_byte_str_to_string(s, win::oem_code_page()) {
Some(s) => s,
// The string is not UTF-8 and isn't valid for the OEM code page
None => format!("Non-UTF-8 output: {}", s.escape_ascii()),
},
}
}

/// Wrappers around the Windows API.
#[cfg(windows)]
mod win {
use windows::Win32::Globalization::{
GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
};

/// Get the Windows system OEM code page. This is most notably the code page
/// used for link.exe's output.
pub fn oem_code_page() -> u32 {
unsafe {
let mut cp: u32 = 0;
// We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
// But the API requires us to pass the data as though it's a [u16] string.
let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
let len_written = GetLocaleInfoEx(
LOCALE_NAME_SYSTEM_DEFAULT,
LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
Some(data),
);
if len_written as usize == len { cp } else { CP_OEMCP }
}
}
/// Try to convert a multi-byte string to a UTF-8 string using the given code page
/// The string does not need to be null terminated.
///
/// This is implemented as a wrapper around `MultiByteToWideChar`.
/// See <https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar>
///
/// It will fail if the multi-byte string is longer than `i32::MAX` or if it contains
/// any invalid bytes for the expected encoding.
pub fn locale_byte_str_to_string(s: &[u8], code_page: u32) -> Option<String> {
// `MultiByteToWideChar` requires a length to be a "positive integer".
if s.len() > isize::MAX as usize {
return None;
}
// Error if the string is not valid for the expected code page.
let flags = MB_ERR_INVALID_CHARS;
// Call MultiByteToWideChar twice.
// First to calculate the length then to convert the string.
let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
if len > 0 {
let mut utf16 = vec![0; len as usize];
len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
if len > 0 {
return utf16.get(..len as usize).map(String::from_utf16_lossy);
}
}
None
}
}

fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {
// On macOS the runtimes are distributed as dylibs which should be linked to
// both executables and dynamic shared objects. Everywhere else the runtimes
Expand Down
6 changes: 6 additions & 0 deletions tests/ui/native-library-link-flags/msvc-non-utf8-output.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// build-fail
// compile-flags:-C link-arg=märchenhaft
// only-msvc
// error-pattern:= note: LINK : fatal error LNK1181:
// normalize-stderr-test "(\s*\|\n)\s*= note: .*\n" -> "$1"
pub fn main() {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
error: linking with `link.exe` failed: exit code: 1181
|
= note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'


error: aborting due to previous error