From 73b65746e81a31c03cbd3751966eb399073f2d9a Mon Sep 17 00:00:00 2001
From: Chris Denton <chris@chrisdenton.dev>
Date: Thu, 20 Apr 2023 08:58:14 +0100
Subject: [PATCH 1/2] Fix Unreadable non-UTF-8 output on localized MSVC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #35785 by converting non UTF-8 linker output to Unicode using the OEM code page.

Before:

```text
  = note: Non-UTF-8 output: LINK : fatal error LNK1181: cannot open input file \'m\x84rchenhaft.obj\'\r\n
```

After:

```text
   = note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'

```

The difference is more dramatic if using a non-ascii language pack for Visual Studio.
---
 Cargo.lock                                    |  1 +
 compiler/rustc_codegen_ssa/Cargo.toml         |  4 ++
 compiler/rustc_codegen_ssa/src/back/link.rs   | 55 ++++++++++++++++++-
 .../msvc-non-utf8-output.rs                   |  6 ++
 .../msvc-non-utf8-output.stderr               |  7 +++
 5 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 tests/ui/native-library-link-flags/msvc-non-utf8-output.rs
 create mode 100644 tests/ui/native-library-link-flags/msvc-non-utf8-output.stderr
diff --git a/Cargo.lock b/Cargo.lock
index 06a2a36f4552b..cd319574f7977 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3250,6 +3250,7 @@ dependencies = [
  "tempfile",
  "thorin-dwp",
  "tracing",
+ "windows 0.46.0",
 ]
 
 [[package]]
diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml
index a421535c9b46f..4f73b731f5a2a 100644
--- a/compiler/rustc_codegen_ssa/Cargo.toml
+++ b/compiler/rustc_codegen_ssa/Cargo.toml
@@ -49,3 +49,7 @@ libc = "0.2.50"
 version = "0.30.1"
 default-features = false
 features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"]
+
+[target.'cfg(windows)'.dependencies.windows]
+version = "0.46.0"
+features = ["Win32_Globalization"]
diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs
index 02e21e74fadc8..feab57e98208f 100644
--- a/compiler/rustc_codegen_ssa/src/back/link.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link.rs
@@ -857,7 +857,7 @@ fn link_natively<'a>(
             if !prog.status.success() {
                 let mut output = prog.stderr.clone();
                 output.extend_from_slice(&prog.stdout);
-                let escaped_output = escape_string(&output);
+                let escaped_output = escape_linker_output(&output, flavor);
                 // FIXME: Add UI tests for this error.
                 let err = errors::LinkingFailed {
                     linker_path: &linker_path,
@@ -1049,6 +1049,59 @@ fn escape_string(s: &[u8]) -> String {
     }
 }
 
+#[cfg(not(windows))]
+fn escape_linker_output(s: &[u8], _flavour: LinkerFlavor) -> String {
+    escape_string(s)
+}
+
+/// If the output of the msvc linker is not UTF-8 and the host is Windows,
+/// then try to convert the string from the OEM encoding.
+#[cfg(windows)]
+fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
+    // This only applies to the actual MSVC linker.
+    if flavour != LinkerFlavor::Msvc(Lld::No) {
+        return escape_string(s);
+    }
+    match str::from_utf8(s) {
+        Ok(s) => return s.to_owned(),
+        Err(_) if s.len() <= i32::MAX as usize => {
+            use windows::Win32::Globalization::{
+                GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
+                LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
+            };
+            // Get the legacy system OEM code page.
+            let code_page = unsafe {
+                let mut cp: u32 = 0;
+                // We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
+                // But the API requires us to pass the data as though it's a [u16] string.
+                let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
+                let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
+                let len_written = GetLocaleInfoEx(
+                    LOCALE_NAME_SYSTEM_DEFAULT,
+                    LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
+                    Some(data),
+                );
+                if len_written as usize == len { cp } else { CP_OEMCP }
+            };
+            // Error if the string is not valid for the expected code page.
+            let flags = MB_ERR_INVALID_CHARS;
+            // Call MultiByteToWideChar twice.
+            // First to calculate the length then to convert the string.
+            let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
+            if len > 0 {
+                let mut utf16 = vec![0; len as usize];
+                len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
+                if len > 0 {
+                    return String::from_utf16_lossy(&utf16[..len as usize]);
+                }
+            }
+        }
+        _ => {}
+    };
+    // The string is not UTF-8 and isn't valid for the OEM code page
+    format!("Non-UTF-8 output: {}", s.escape_ascii())
+}
+
 fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {
     // On macOS the runtimes are distributed as dylibs which should be linked to
     // both executables and dynamic shared objects. Everywhere else the runtimes
diff --git a/tests/ui/native-library-link-flags/msvc-non-utf8-output.rs b/tests/ui/native-library-link-flags/msvc-non-utf8-output.rs
new file mode 100644
index 0000000000000..3fb2842d694cc
--- /dev/null
+++ b/tests/ui/native-library-link-flags/msvc-non-utf8-output.rs
@@ -0,0 +1,6 @@
+// build-fail
+// compile-flags:-C link-arg=märchenhaft
+// only-msvc
+// error-pattern:= note: LINK : fatal error LNK1181:
+// normalize-stderr-test "(\s*\|\n)\s*= note: .*\n" -> "$1"
+pub fn main() {}
diff --git a/tests/ui/native-library-link-flags/msvc-non-utf8-output.stderr b/tests/ui/native-library-link-flags/msvc-non-utf8-output.stderr
new file mode 100644
index 0000000000000..f843aad782c30
--- /dev/null
+++ b/tests/ui/native-library-link-flags/msvc-non-utf8-output.stderr
@@ -0,0 +1,7 @@
+error: linking with `link.exe` failed: exit code: 1181
+   |
+   = note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'
+           
+
+error: aborting due to previous error
+

From 9b9d39e43f3d8723b36f7e4b9ecafa36203fde45 Mon Sep 17 00:00:00 2001
From: Chris Denton <chris@chrisdenton.dev>
Date: Thu, 27 Apr 2023 09:27:23 +0100
Subject: [PATCH 2/2] Abstract `MultiByteToWideChar`

---
 compiler/rustc_codegen_ssa/src/back/link.rs | 90 +++++++++++++--------
 1 file changed, 57 insertions(+), 33 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs
index feab57e98208f..fe21986884f06 100644
--- a/compiler/rustc_codegen_ssa/src/back/link.rs
+++ b/compiler/rustc_codegen_ssa/src/back/link.rs
@@ -1064,42 +1064,66 @@ fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
     }
     match str::from_utf8(s) {
         Ok(s) => return s.to_owned(),
-        Err(_) if s.len() <= i32::MAX as usize => {
-            use windows::Win32::Globalization::{
-                GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
-                LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
-            };
-            // Get the legacy system OEM code page.
-            let code_page = unsafe {
-                let mut cp: u32 = 0;
-                // We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
-                // But the API requires us to pass the data as though it's a [u16] string.
-                let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
-                let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
-                let len_written = GetLocaleInfoEx(
-                    LOCALE_NAME_SYSTEM_DEFAULT,
-                    LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
-                    Some(data),
-                );
-                if len_written as usize == len { cp } else { CP_OEMCP }
-            };
-            // Error if the string is not valid for the expected code page.
-            let flags = MB_ERR_INVALID_CHARS;
-            // Call MultiByteToWideChar twice.
-            // First to calculate the length then to convert the string.
-            let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
+        Err(_) => match win::locale_byte_str_to_string(s, win::oem_code_page()) {
+            Some(s) => s,
+            // The string is not UTF-8 and isn't valid for the OEM code page
+            None => format!("Non-UTF-8 output: {}", s.escape_ascii()),
+        },
+    }
+}
+
+/// Wrappers around the Windows API.
+#[cfg(windows)]
+mod win {
+    use windows::Win32::Globalization::{
+        GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
+        LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
+    };
+
+    /// Get the Windows system OEM code page. This is most notably the code page
+    /// used for link.exe's output.
+    pub fn oem_code_page() -> u32 {
+        unsafe {
+            let mut cp: u32 = 0;
+            // We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
+            // But the API requires us to pass the data as though it's a [u16] string.
+            let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
+            let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
+            let len_written = GetLocaleInfoEx(
+                LOCALE_NAME_SYSTEM_DEFAULT,
+                LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
+                Some(data),
+            );
+            if len_written as usize == len { cp } else { CP_OEMCP }
+        }
+    }
+    /// Try to convert a multi-byte string to a UTF-8 string using the given code page
+    /// The string does not need to be null terminated.
+    ///
+    /// This is implemented as a wrapper around `MultiByteToWideChar`.
+    /// See <https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar>
+    ///
+    /// It will fail if the multi-byte string is longer than `i32::MAX` or if it contains
+    /// any invalid bytes for the expected encoding.
+    pub fn locale_byte_str_to_string(s: &[u8], code_page: u32) -> Option<String> {
+        // `MultiByteToWideChar` requires a length to be a "positive integer".
+        if s.len() > isize::MAX as usize {
+            return None;
+        }
+        // Error if the string is not valid for the expected code page.
+        let flags = MB_ERR_INVALID_CHARS;
+        // Call MultiByteToWideChar twice.
+        // First to calculate the length then to convert the string.
+        let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
+        if len > 0 {
+            let mut utf16 = vec![0; len as usize];
+            len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
             if len > 0 {
-                let mut utf16 = vec![0; len as usize];
-                len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
-                if len > 0 {
-                    return String::from_utf16_lossy(&utf16[..len as usize]);
-                }
+                return utf16.get(..len as usize).map(String::from_utf16_lossy);
             }
         }
-        _ => {}
-    };
-    // The string is not UTF-8 and isn't valid for the OEM code page
-    format!("Non-UTF-8 output: {}", s.escape_ascii())
+        None
+    }
 }
 
 fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {