Skip to content

Commit

Permalink
Move all cold code to the end of the function
Browse files Browse the repository at this point in the history
Fixes rust-lang#836

Benchmark #1: simple-raytracer/raytracer_cg_clif
  Time (mean ± σ):      9.250 s ±  0.056 s    [User: 9.213 s, System: 0.015 s]
  Range (min … max):    9.151 s …  9.348 s    20 runs

Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated
  Time (mean ± σ):      9.179 s ±  0.101 s    [User: 9.141 s, System: 0.016 s]
  Range (min … max):    9.070 s …  9.473 s    20 runs

Summary
  'simple-raytracer/raytracer_cg_clif_cold_separated' ran
    1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif'
  • Loading branch information
bjorn3 committed Jan 11, 2020
1 parent 38797f8 commit c74b306
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/abi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>(
args: Vec<CValue<'tcx>>,
ret_place: Option<CPlace<'tcx>>,
) {
// FIXME mark the current ebb as cold when calling a `#[cold]` function.
let fn_sig = fx
.tcx
.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx));
Expand Down
15 changes: 12 additions & 3 deletions src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
// Predefine ebb's
let start_ebb = bcx.create_ebb();
let ebb_map: IndexVec<BasicBlock, Ebb> = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect();
let mut cold_ebbs = EntitySet::new();
for (bb, &ebb) in ebb_map.iter_enumerated() {
if mir.basic_blocks()[bb].is_cleanup {
cold_ebbs.insert(ebb);
}
}

// Make FunctionCx
let pointer_type = cx.module.target_config().pointer_type();
Expand All @@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
ebb_map,
local_map: HashMap::new(),
caller_location: None, // set by `codegen_fn_prelude`
cold_ebbs,

clif_comments,
constants_cx: &mut cx.constants_cx,
Expand All @@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(
let mut clif_comments = fx.clif_comments;
let source_info_set = fx.source_info_set;
let local_map = fx.local_map;
let cold_ebbs = fx.cold_ebbs;

#[cfg(debug_assertions)]
crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None);
Expand All @@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>(

// Perform rust specific optimizations
tcx.sess.time("optimize clif ir", || {
crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments);
crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments);
});

// Define function
Expand Down Expand Up @@ -191,17 +199,18 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) {
}
}
let cond = trans_operand(fx, cond).load_scalar(fx);

let target = fx.get_ebb(*target);
let failure = fx.bcx.create_ebb();
fx.cold_ebbs.insert(failure);

if *expected {
fx.bcx.ins().brz(cond, failure, &[]);
} else {
fx.bcx.ins().brnz(cond, failure, &[]);
};
fx.bcx.ins().jump(target, &[]);

// FIXME insert bb after all other bb's to reduce the amount of jumps in the common
// case and improve code locality.
fx.bcx.switch_to_block(failure);
trap_panic(
fx,
Expand Down
3 changes: 3 additions & 0 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> {
/// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
pub caller_location: Option<CValue<'tcx>>,

/// See [crate::optimize::code_layout] for more information.
pub cold_ebbs: EntitySet<Ebb>,

pub clif_comments: crate::pretty_clif::CommentWriter,
pub constants_cx: &'clif mut crate::constant::ConstantCx,
pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), DataId>,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ mod prelude {
pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind};

pub use cranelift_codegen::Context;
pub use cranelift_codegen::entity::EntitySet;
pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value};
pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
pub use cranelift_codegen::ir::function::Function;
Expand Down
34 changes: 34 additions & 0 deletions src/optimize/code_layout.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//! This optimization moves cold code to the end of the function.
//!
//! Some code is executed much less often than other code. For example panicking or the
//! landingpads for unwinding. By moving this cold code to the end of the function the average
//! amount of jumps is reduced and the code locality is improved.
//!
//! # Undefined behaviour
//!
//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.

use crate::prelude::*;

pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet<Ebb>) {
// FIXME Move the ebb in place instead of remove and append once
// bytecodealliance/cranelift#1339 is implemented.

let mut ebb_insts = HashMap::new();
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
let insts = ctx.func.layout.ebb_insts(ebb).collect::<Vec<_>>();
for &inst in &insts {
ctx.func.layout.remove_inst(inst);
}
ebb_insts.insert(ebb, insts);
ctx.func.layout.remove_ebb(ebb);
}

// And then append them at the back again.
for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) {
ctx.func.layout.append_ebb(ebb);
for inst in ebb_insts.remove(&ebb).unwrap() {
ctx.func.layout.append_inst(inst, ebb);
}
}
}
5 changes: 5 additions & 0 deletions src/optimize/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
use crate::prelude::*;

mod code_layout;
mod stack2reg;

pub fn optimize_function<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
ctx: &mut Context,
cold_ebbs: &EntitySet<Ebb>,
clif_comments: &mut crate::pretty_clif::CommentWriter,
) {
// The code_layout optimization is very cheap.
self::code_layout::optimize_function(ctx, cold_ebbs);

if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
return; // FIXME classify optimizations over opt levels
}
Expand Down
1 change: 0 additions & 1 deletion src/optimize/stack2reg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet};
use std::ops::Not;

use cranelift_codegen::cursor::{Cursor, FuncCursor};
use cranelift_codegen::entity::EntitySet;
use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
use cranelift_codegen::ir::immediates::Offset32;

Expand Down

0 comments on commit c74b306

Please sign in to comment.