From 6e53e66bd3b700b873ba93b30fac7c065954d46f Mon Sep 17 00:00:00 2001 From: h1467792822 <1467792822@qq.com> Date: Tue, 5 Dec 2023 12:42:57 +0800 Subject: [PATCH] MCP #705: Provide the option `-Csymbol-mangling-version=hashed -Z unstable-options` to shorten symbol names by replacing them with a digest. Enrich test cases --- compiler/rustc_session/src/config.rs | 15 +++ compiler/rustc_session/src/options.rs | 6 +- compiler/rustc_symbol_mangling/src/hashed.rs | 43 ++++++++ compiler/rustc_symbol_mangling/src/lib.rs | 4 + compiler/rustc_symbol_mangling/src/v0.rs | 104 ++++++++++-------- .../run-make/symbol-mangling-hashed/Makefile | 48 ++++++++ .../symbol-mangling-hashed/a_dylib.rs | 4 + .../run-make/symbol-mangling-hashed/a_rlib.rs | 5 + .../run-make/symbol-mangling-hashed/b_bin.rs | 9 ++ .../symbol-mangling-hashed/b_dylib.rs | 9 ++ .../bad-value.bad.stderr | 2 +- .../bad-value.blank.stderr | 2 +- .../bad-value.no-value.stderr | 2 +- .../unstable.hashed.stderr | 2 + tests/ui/symbol-mangling-version/unstable.rs | 5 +- 15 files changed, 211 insertions(+), 49 deletions(-) create mode 100644 compiler/rustc_symbol_mangling/src/hashed.rs create mode 100644 tests/run-make/symbol-mangling-hashed/Makefile create mode 100644 tests/run-make/symbol-mangling-hashed/a_dylib.rs create mode 100644 tests/run-make/symbol-mangling-hashed/a_rlib.rs create mode 100644 tests/run-make/symbol-mangling-hashed/b_bin.rs create mode 100644 tests/run-make/symbol-mangling-hashed/b_dylib.rs create mode 100644 tests/ui/symbol-mangling-version/unstable.hashed.stderr diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index e751ff13a34ca..2ec1a726cef1f 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -347,6 +347,7 @@ impl SwitchWithOptPath { pub enum SymbolManglingVersion { Legacy, V0, + Hashed, } #[derive(Clone, Copy, Debug, PartialEq, Hash)] @@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M match cg.symbol_mangling_version { // Stable values: None | Some(SymbolManglingVersion::V0) => {} + // Unstable values: Some(SymbolManglingVersion::Legacy) => { if !unstable_opts.unstable_options { @@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M ); } } + Some(SymbolManglingVersion::Hashed) => { + if !unstable_opts.unstable_options { + early_dcx.early_fatal( + "`-C symbol-mangling-version=hashed` requires `-Z unstable-options`", + ); + } + } } // Check for unstable values of `-C instrument-coverage`. @@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M ); } Some(SymbolManglingVersion::V0) => {} + Some(SymbolManglingVersion::Hashed) => { + early_dcx.early_warn( + "-C instrument-coverage requires symbol mangling version `v0`, \ + but `-C symbol-mangling-version=hashed` was specified", + ); + } } } diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 486b6d4bf2e14..d8d201d5f244d 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -407,7 +407,8 @@ mod desc { pub const parse_switch_with_opt_path: &str = "an optional path to the profiling data output directory"; pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`"; - pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)"; + pub const parse_symbol_mangling_version: &str = + "one of: `legacy`, `v0` (RFC 2603), or `hashed`"; pub const parse_src_file_hash: &str = "either `md5` or `sha1`"; pub const parse_relocation_model: &str = "one of supported relocation models (`rustc --print relocation-models`)"; @@ -1180,6 +1181,7 @@ mod parse { *slot = match v { Some("legacy") => Some(SymbolManglingVersion::Legacy), Some("v0") => Some(SymbolManglingVersion::V0), + Some("hashed") => Some(SymbolManglingVersion::Hashed), _ => return false, }; true @@ -1504,7 +1506,7 @@ options! { "tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"), symbol_mangling_version: Option = (None, parse_symbol_mangling_version, [TRACKED], - "which mangling version to use for symbol names ('legacy' (default) or 'v0')"), + "which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"), target_cpu: Option = (None, parse_opt_string, [TRACKED], "select target processor (`rustc --print target-cpus` for details)"), target_feature: String = (String::new(), parse_target_feature, [TRACKED], diff --git a/compiler/rustc_symbol_mangling/src/hashed.rs b/compiler/rustc_symbol_mangling/src/hashed.rs new file mode 100644 index 0000000000000..d4cd6161ac040 --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/hashed.rs @@ -0,0 +1,43 @@ +use crate::v0; +use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher}; +use rustc_hir::def_id::CrateNum; +use rustc_middle::ty::{Instance, TyCtxt}; + +use std::fmt::Write; + +pub(super) fn mangle<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + instantiating_crate: Option, + full_mangling_name: impl FnOnce() -> String, +) -> String { + // The symbol of a generic function may be scattered in multiple downstream dylibs. + // If the symbol of a generic function still contains `crate name`, hash conflicts between the + // generic funcion and other symbols of the same `crate` cannot be detected in time during + // construction. This symbol conflict is left over until it occurs during run time. + // In this case, `instantiating-crate name` is used to replace `crate name` can completely + // eliminate the risk of the preceding potential hash conflict. + let crate_num = + if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate }; + + let mut symbol = "_RNxC".to_string(); + v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol); + + let hash = tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + full_mangling_name().hash_stable(&mut hcx, &mut hasher); + hasher.finish::().as_u64() + }); + + push_hash64(hash, &mut symbol); + + symbol +} + +// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does +// not help prevent hash collisions +fn push_hash64(hash: u64, output: &mut String) { + let hash = v0::encode_integer_62(hash); + let hash_len = hash.len(); + let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]); +} diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 8c035ba948b15..bf4ea0003abb4 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -111,6 +111,7 @@ use rustc_middle::query::Providers; use rustc_middle::ty::{self, Instance, TyCtxt}; use rustc_session::config::SymbolManglingVersion; +mod hashed; mod legacy; mod v0; @@ -265,6 +266,9 @@ fn compute_symbol_name<'tcx>( let symbol = match mangling_version { SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate), SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), + SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || { + v0::mangle(tcx, instance, instantiating_crate) + }), }; debug_assert!( diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs index e89a640767f22..16ebda55a7a51 100644 --- a/compiler/rustc_symbol_mangling/src/v0.rs +++ b/compiler/rustc_symbol_mangling/src/v0.rs @@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> { /// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`, /// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc. fn push_integer_62(&mut self, x: u64) { - if let Some(x) = x.checked_sub(1) { - base_n::push_str(x as u128, 62, &mut self.out); - } - self.push("_"); + push_integer_62(x, &mut self.out) } /// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is: @@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> { } fn push_ident(&mut self, ident: &str) { - let mut use_punycode = false; - for b in ident.bytes() { - match b { - b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {} - 0x80..=0xff => use_punycode = true, - _ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident), - } - } - - let punycode_string; - let ident = if use_punycode { - self.push("u"); - - // FIXME(eddyb) we should probably roll our own punycode implementation. - let mut punycode_bytes = match punycode::encode(ident) { - Ok(s) => s.into_bytes(), - Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident), - }; - - // Replace `-` with `_`. - if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') { - *c = b'_'; - } - - // FIXME(eddyb) avoid rechecking UTF-8 validity. - punycode_string = String::from_utf8(punycode_bytes).unwrap(); - &punycode_string - } else { - ident - }; - - let _ = write!(self.out, "{}", ident.len()); - - // Write a separating `_` if necessary (leading digit or `_`). - if let Some('_' | '0'..='9') = ident.chars().next() { - self.push("_"); - } - - self.push(ident); + push_ident(ident, &mut self.out) } fn path_append_ns( @@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> { Ok(()) } } +/// Push a `_`-terminated base 62 integer, using the format +/// specified in the RFC as ``, that is: +/// * `x = 0` is encoded as just the `"_"` terminator +/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`, +/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc. +pub(crate) fn push_integer_62(x: u64, output: &mut String) { + if let Some(x) = x.checked_sub(1) { + base_n::push_str(x as u128, 62, output); + } + output.push('_'); +} + +pub(crate) fn encode_integer_62(x: u64) -> String { + let mut output = String::new(); + push_integer_62(x, &mut output); + output +} + +pub(crate) fn push_ident(ident: &str, output: &mut String) { + let mut use_punycode = false; + for b in ident.bytes() { + match b { + b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {} + 0x80..=0xff => use_punycode = true, + _ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident), + } + } + + let punycode_string; + let ident = if use_punycode { + output.push('u'); + + // FIXME(eddyb) we should probably roll our own punycode implementation. + let mut punycode_bytes = match punycode::encode(ident) { + Ok(s) => s.into_bytes(), + Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident), + }; + + // Replace `-` with `_`. + if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') { + *c = b'_'; + } + + // FIXME(eddyb) avoid rechecking UTF-8 validity. + punycode_string = String::from_utf8(punycode_bytes).unwrap(); + &punycode_string + } else { + ident + }; + + let _ = write!(output, "{}", ident.len()); + + // Write a separating `_` if necessary (leading digit or `_`). + if let Some('_' | '0'..='9') = ident.chars().next() { + output.push('_'); + } + + output.push_str(ident); +} diff --git a/tests/run-make/symbol-mangling-hashed/Makefile b/tests/run-make/symbol-mangling-hashed/Makefile new file mode 100644 index 0000000000000..68894b2192aba --- /dev/null +++ b/tests/run-make/symbol-mangling-hashed/Makefile @@ -0,0 +1,48 @@ +include ../tools.mk + +# ignore-cross-compile +# only-linux +# only-x86_64 + +NM=nm -D +RLIB_NAME=liba_rlib.rlib +DYLIB_NAME=liba_dylib.so +SO_NAME=libb_dylib.so +BIN_NAME=b_bin + +ifeq ($(UNAME),Darwin) +NM=nm -gU +RLIB_NAME=liba_rlib.rlib +DYLIB_NAME=liba_dylib.dylib +SO_NAME=libb_dylib.dylib +BIN_NAME=b_bin +endif + +ifdef IS_WINDOWS +NM=nm -g +RLIB_NAME=liba_rlib.dll.a +DYLIB_NAME=liba_dylib.dll +SO_NAME=libb_dylib.dll +BIN_NAME=b_bin.exe +endif + +all: + $(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs + $(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs + $(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs + $(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs + + # Check hashed symbol name + + [ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ] + [ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ] + + [ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ] + [ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ] + [ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ] + + [ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ] + [ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ] + [ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ] + + $(call RUN,$(BIN_NAME)) diff --git a/tests/run-make/symbol-mangling-hashed/a_dylib.rs b/tests/run-make/symbol-mangling-hashed/a_dylib.rs new file mode 100644 index 0000000000000..8aec8fd82a53f --- /dev/null +++ b/tests/run-make/symbol-mangling-hashed/a_dylib.rs @@ -0,0 +1,4 @@ +#![crate_type="dylib"] +pub fn hello() { + println!("hello dylib"); +} diff --git a/tests/run-make/symbol-mangling-hashed/a_rlib.rs b/tests/run-make/symbol-mangling-hashed/a_rlib.rs new file mode 100644 index 0000000000000..873c86c5d0b4c --- /dev/null +++ b/tests/run-make/symbol-mangling-hashed/a_rlib.rs @@ -0,0 +1,5 @@ +#![crate_type="rlib"] + +pub fn hello() { + println!("hello rlib"); +} diff --git a/tests/run-make/symbol-mangling-hashed/b_bin.rs b/tests/run-make/symbol-mangling-hashed/b_bin.rs new file mode 100644 index 0000000000000..bcc53c37e122a --- /dev/null +++ b/tests/run-make/symbol-mangling-hashed/b_bin.rs @@ -0,0 +1,9 @@ +extern crate a_rlib; +extern crate a_dylib; +extern crate b_dylib; + +fn main() { + a_rlib::hello(); + a_dylib::hello(); + b_dylib::hello(); +} diff --git a/tests/run-make/symbol-mangling-hashed/b_dylib.rs b/tests/run-make/symbol-mangling-hashed/b_dylib.rs new file mode 100644 index 0000000000000..c26a04b39ec32 --- /dev/null +++ b/tests/run-make/symbol-mangling-hashed/b_dylib.rs @@ -0,0 +1,9 @@ +#![crate_type="dylib"] + +extern crate a_rlib; +extern crate a_dylib; + +pub fn hello() { + a_rlib::hello(); + a_dylib::hello(); +} diff --git a/tests/ui/symbol-mangling-version/bad-value.bad.stderr b/tests/ui/symbol-mangling-version/bad-value.bad.stderr index c36c73c6069f1..a12e5e241c022 100644 --- a/tests/ui/symbol-mangling-version/bad-value.bad.stderr +++ b/tests/ui/symbol-mangling-version/bad-value.bad.stderr @@ -1,2 +1,2 @@ -error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected +error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected diff --git a/tests/ui/symbol-mangling-version/bad-value.blank.stderr b/tests/ui/symbol-mangling-version/bad-value.blank.stderr index 0e70af5b8ffbe..95456587781e6 100644 --- a/tests/ui/symbol-mangling-version/bad-value.blank.stderr +++ b/tests/ui/symbol-mangling-version/bad-value.blank.stderr @@ -1,2 +1,2 @@ -error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected +error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected diff --git a/tests/ui/symbol-mangling-version/bad-value.no-value.stderr b/tests/ui/symbol-mangling-version/bad-value.no-value.stderr index 77013b72b6c1d..325e47a281fae 100644 --- a/tests/ui/symbol-mangling-version/bad-value.no-value.stderr +++ b/tests/ui/symbol-mangling-version/bad-value.no-value.stderr @@ -1,2 +1,2 @@ -error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=) +error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=) diff --git a/tests/ui/symbol-mangling-version/unstable.hashed.stderr b/tests/ui/symbol-mangling-version/unstable.hashed.stderr new file mode 100644 index 0000000000000..f2ae18290f261 --- /dev/null +++ b/tests/ui/symbol-mangling-version/unstable.hashed.stderr @@ -0,0 +1,2 @@ +error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options` + diff --git a/tests/ui/symbol-mangling-version/unstable.rs b/tests/ui/symbol-mangling-version/unstable.rs index df87a39cdfbd1..42750a64574dc 100644 --- a/tests/ui/symbol-mangling-version/unstable.rs +++ b/tests/ui/symbol-mangling-version/unstable.rs @@ -1,6 +1,9 @@ -// revisions: legacy legacy-ok +// revisions: legacy legacy-ok hashed hashed-ok // [legacy] compile-flags: -Csymbol-mangling-version=legacy // [legacy-ok] check-pass // [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy +// [hashed] compile-flags: -Csymbol-mangling-version=hashed +// [hashed-ok] check-pass +// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed fn main() {}