Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some LTO fixes. #8349

Merged
merged 1 commit into from
Jun 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/cargo/core/compiler/crate_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ impl CrateType {
}
}

pub fn can_lto(&self) -> bool {
match self {
CrateType::Bin | CrateType::Staticlib | CrateType::Cdylib => true,
CrateType::Lib
| CrateType::Rlib
| CrateType::Dylib
| CrateType::ProcMacro
| CrateType::Other(..) => false,
}
}

pub fn is_linkable(&self) -> bool {
match self {
CrateType::Lib | CrateType::Rlib | CrateType::Dylib | CrateType::ProcMacro => true,
Expand Down
185 changes: 123 additions & 62 deletions src/cargo/core/compiler/lto.rs
Original file line number Diff line number Diff line change
@@ -1,131 +1,192 @@
use crate::core::compiler::{Context, Unit};
use crate::core::compiler::{CompileMode, Context, CrateType, Unit};
use crate::core::interning::InternedString;
use crate::core::profiles;
use crate::core::TargetKind;

use crate::util::errors::CargoResult;
use std::collections::hash_map::{Entry, HashMap};

/// Possible ways to run rustc and request various parts of LTO.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
///
/// Variant | Flag | Object Code | Bitcode
/// -------------------|------------------------|-------------|--------
/// `Run` | `-C lto=foo` | n/a | n/a
/// `Off` | `-C lto=off` | n/a | n/a
/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓
/// `ObjectAndBitcode` | | ✓ | ✓
/// `OnlyObject` | `-C embed-bitcode=no` | ✓ |
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Lto {
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional.
/// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is
/// None, that corresponds to `-Clto` with no argument, which means do
/// "fat" LTO.
Run(Option<InternedString>),

/// This rustc invocation only needs to produce bitcode, there's no need to
/// produce object files, so we can pass `-Clinker-plugin-lto`
/// LTO has been explicitly listed as "off". This means no thin-local-LTO,
/// no LTO anywhere, I really mean it!
Off,

/// This rustc invocation only needs to produce bitcode (it is *only* used
/// for LTO), there's no need to produce object files, so we can pass
/// `-Clinker-plugin-lto`
OnlyBitcode,

/// This rustc invocation needs to embed bitcode in object files. This means
/// that object files may be used for a normal link, and the crate may be
/// loaded for LTO later, so both are required.
EmbedBitcode,
ObjectAndBitcode,

/// Nothing related to LTO is required of this compilation.
None,
/// This should not include bitcode. This is primarily to reduce disk
/// space usage.
OnlyObject,
}

pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
let mut map = HashMap::new();
for unit in cx.bcx.roots.iter() {
calculate(cx, &mut map, unit, Lto::None)?;
let root_lto = match unit.profile.lto {
// LTO not requested, no need for bitcode.
profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject,
_ => {
let crate_types = unit.target.rustc_crate_types();
if unit.target.for_host() {
Lto::OnlyObject
} else if needs_object(&crate_types) {
lto_when_needs_object(&crate_types)
} else {
// This may or may not participate in LTO, let's start
// with the minimum requirements. This may be expanded in
// `calculate` below if necessary.
Lto::OnlyBitcode
}
}
};
calculate(cx, &mut map, unit, root_lto)?;
}
cx.lto = map;
Ok(())
}

/// Whether or not any of these crate types need object code.
fn needs_object(crate_types: &[CrateType]) -> bool {
crate_types.iter().any(|k| k.can_lto() || k.is_dynamic())
}

/// Lto setting to use when this unit needs object code.
fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto {
if crate_types.iter().any(CrateType::can_lto) {
// A mixed rlib/cdylib whose parent is running LTO. This
// needs both, for bitcode in the rlib (for LTO) and the
// cdylib requires object code.
Lto::ObjectAndBitcode
} else {
// A dylib whose parent is running LTO. rustc currently
// doesn't support LTO with dylibs, so bitcode is not
// needed.
Lto::OnlyObject
}
}

fn calculate(
cx: &Context<'_, '_>,
map: &mut HashMap<Unit, Lto>,
unit: &Unit,
lto_for_deps: Lto,
parent_lto: Lto,
) -> CargoResult<()> {
let (lto, lto_for_deps) = if unit.target.for_host() {
let crate_types = match unit.mode {
// Note: Doctest ignores LTO, but for now we'll compute it as-if it is
// a Bin, in case it is ever supported in the future.
CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin],
// Notes on other modes:
// - Check: Treat as the underlying type, it doesn't really matter.
// - Doc: LTO is N/A for the Doc unit itself since rustdoc does not
// support codegen flags. We still compute the dependencies, which
// are mostly `Check`.
// - RunCustomBuild is ignored because it is always "for_host".
_ => unit.target.rustc_crate_types(),
};
// LTO can only be performed if *all* of the crate types support it.
// For example, a cdylib/rlib combination won't allow LTO.
let all_lto_types = crate_types.iter().all(CrateType::can_lto);
// Compute the LTO based on the profile, and what our parent requires.
let lto = if unit.target.for_host() {
// Disable LTO for host builds since we only really want to perform LTO
// for the final binary, and LTO on plugins/build scripts/proc macros is
// largely not desired.
(Lto::None, Lto::None)
} else if unit.target.is_linkable() {
// A "linkable" target is one that produces and rlib or dylib in this
// case. In this scenario we cannot pass `-Clto` to the compiler because
// that is an invalid request, this is simply a dependency. What we do,
// however, is respect the request for whatever dependencies need to
// have.
//
// Here if no LTO is requested then we keep it turned off. Otherwise LTO
// is requested in some form, which means ideally we need just what's
// requested, nothing else. It's possible, though, to have libraries
// which are both a cdylib and and rlib, for example, which means that
// object files are getting sent to the linker. That means that we need
// to fully embed bitcode rather than simply generating just bitcode.
let has_non_linkable_lib = match unit.target.kind() {
TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()),
_ => true,
};
match lto_for_deps {
Lto::None => (Lto::None, Lto::None),
_ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode),
other => (other, other),
Lto::OnlyObject
} else if all_lto_types {
// Note that this ignores the `parent_lto` because this isn't a
// linkable crate type; this unit is not being embedded in the parent.
match unit.profile.lto {
profiles::Lto::Named(s) => Lto::Run(Some(s)),
profiles::Lto::Off => Lto::Off,
profiles::Lto::Bool(true) => Lto::Run(None),
profiles::Lto::Bool(false) => Lto::OnlyObject,
}
} else {
// Otherwise this target can perform LTO and we're going to read the
// LTO value out of the profile. Note that we ignore `lto_for_deps`
// here because if a unit depends on another unit than can LTO this
// isn't a rustc-level dependency but rather a Cargo-level dependency.
// For example this is an integration test depending on a binary.
match unit.profile.lto {
profiles::Lto::Named(s) => match s.as_str() {
"n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None),
_ => (Lto::Run(Some(s)), Lto::OnlyBitcode),
},
profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode),
profiles::Lto::Bool(false) => (Lto::None, Lto::None),
match (parent_lto, needs_object(&crate_types)) {
// An rlib whose parent is running LTO, we only need bitcode.
(Lto::Run(_), false) => Lto::OnlyBitcode,
// LTO when something needs object code.
(Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types),
// LTO is disabled, no need for bitcode.
(Lto::Off, _) => Lto::OnlyObject,
// If this doesn't have any requirements, or the requirements are
// already satisfied, then stay with our parent.
(_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto,
}
};

match map.entry(unit.clone()) {
// Merge the computed LTO. If this unit appears multiple times in the
// graph, the merge may expand the requirements.
let merged_lto = match map.entry(unit.clone()) {
// If we haven't seen this unit before then insert our value and keep
// going.
Entry::Vacant(v) => {
v.insert(lto);
}
Entry::Vacant(v) => *v.insert(lto),

Entry::Occupied(mut v) => {
let result = match (lto, v.get()) {
// No change in requirements.
(Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode,
(Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject,

// Once we're running LTO we keep running LTO. We should always
// calculate the same thing here each iteration because if we
// see this twice then it means, for example, two unit tests
// depend on a binary, which is normal.
(Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s),

// If we calculated the same thing as before then we can bail
// out quickly.
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()),
// Off means off! This has the same reasoning as `Lto::Run`.
(Lto::Off, _) | (_, Lto::Off) => Lto::Off,

// Once a target has requested both, that's the maximal amount
// of work that can be done, so we just keep doing that work.
(Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode,

// Upgrade so that both requirements can be met.
//
// This is where the trickiness happens. This unit needs
// bitcode and the previously calculated value for this unit
// says it didn't need bitcode (or vice versa). This means that
// we're a shared dependency between some targets which require
// LTO and some which don't. This means that instead of being
// either only-objects or only-bitcode we have to embed both in
// rlibs (used for different compilations), so we switch to
// embedding bitcode.
(Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode,

// Once a target has requested bitcode embedding that's the
// maximal amount of work that can be done, so we just keep
// doing that work.
(Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode,
// including both.
(Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => {
Lto::ObjectAndBitcode
}
};
// No need to recurse if we calculated the same value as before.
if result == *v.get() {
return Ok(());
}
v.insert(result);
result
}
}
};

for dep in cx.unit_deps(unit) {
calculate(cx, map, &dep.unit, lto_for_deps)?;
calculate(cx, map, &dep.unit, merged_lto)?;
}
Ok(())
}
8 changes: 6 additions & 2 deletions src/cargo/core/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub use self::job::Freshness;
use self::job::{Job, Work};
use self::job_queue::{JobQueue, JobState};
pub(crate) use self::layout::Layout;
pub use self::lto::Lto;
use self::output_depinfo::output_depinfo;
use self::unit_graph::UnitDep;
pub use crate::core::compiler::unit::{Unit, UnitInterner};
Expand Down Expand Up @@ -787,7 +788,10 @@ fn build_base_args(
lto::Lto::Run(Some(s)) => {
cmd.arg("-C").arg(format!("lto={}", s));
}
lto::Lto::EmbedBitcode => {} // this is rustc's default
lto::Lto::Off => {
cmd.arg("-C").arg("lto=off");
}
lto::Lto::ObjectAndBitcode => {} // this is rustc's default
lto::Lto::OnlyBitcode => {
// Note that this compiler flag, like the one below, is just an
// optimization in terms of build time. If we don't pass it then
Expand All @@ -804,7 +808,7 @@ fn build_base_args(
cmd.arg("-Clinker-plugin-lto");
}
}
lto::Lto::None => {
lto::Lto::OnlyObject => {
if cx
.bcx
.target_data
Expand Down
8 changes: 7 additions & 1 deletion src/cargo/core/profiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,9 @@ fn merge_profile(profile: &mut Profile, toml: &TomlProfile) {
}
match toml.lto {
Some(StringOrBool::Bool(b)) => profile.lto = Lto::Bool(b),
Some(StringOrBool::String(ref n)) if matches!(n.as_str(), "off" | "n" | "no") => {
profile.lto = Lto::Off
}
Some(StringOrBool::String(ref n)) => profile.lto = Lto::Named(InternedString::new(n)),
None => {}
}
Expand Down Expand Up @@ -747,8 +750,10 @@ impl Profile {
/// The link-time-optimization setting.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
pub enum Lto {
/// False = no LTO
/// Explicitly no LTO, disables thin-LTO.
Off,
/// True = "Fat" LTO
/// False = rustc default (no args), currently "thin LTO"
Bool(bool),
/// Named LTO settings like "thin".
Named(InternedString),
Expand All @@ -760,6 +765,7 @@ impl serde::ser::Serialize for Lto {
S: serde::ser::Serializer,
{
match self {
Lto::Off => "off".serialize(s),
Lto::Bool(b) => b.to_string().serialize(s),
Lto::Named(n) => n.serialize(s),
}
Expand Down
Loading