From c74b3060372cb1057f5623a2d09d8b1599c133f8 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 11 Jan 2020 17:57:18 +0100 Subject: [PATCH] Move all cold code to the end of the function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #836 Benchmark #1: simple-raytracer/raytracer_cg_clif Time (mean ± σ): 9.250 s ± 0.056 s [User: 9.213 s, System: 0.015 s] Range (min … max): 9.151 s … 9.348 s 20 runs Benchmark #2: simple-raytracer/raytracer_cg_clif_cold_separated Time (mean ± σ): 9.179 s ± 0.101 s [User: 9.141 s, System: 0.016 s] Range (min … max): 9.070 s … 9.473 s 20 runs Summary 'simple-raytracer/raytracer_cg_clif_cold_separated' ran 1.01 ± 0.01 times faster than 'simple-raytracer/raytracer_cg_clif' --- src/abi/mod.rs | 1 + src/base.rs | 15 ++++++++++++--- src/common.rs | 3 +++ src/lib.rs | 1 + src/optimize/code_layout.rs | 34 ++++++++++++++++++++++++++++++++++ src/optimize/mod.rs | 5 +++++ src/optimize/stack2reg.rs | 1 - 7 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 src/optimize/code_layout.rs diff --git a/src/abi/mod.rs b/src/abi/mod.rs index e9c9a751c26d2..c983daabdc101 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -506,6 +506,7 @@ fn codegen_call_inner<'tcx>( args: Vec>, ret_place: Option>, ) { + // FIXME mark the current ebb as cold when calling a `#[cold]` function. let fn_sig = fx .tcx .normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), &fn_ty.fn_sig(fx.tcx)); diff --git a/src/base.rs b/src/base.rs index 6dc9f92775f10..db10443df1e33 100644 --- a/src/base.rs +++ b/src/base.rs @@ -32,6 +32,12 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( // Predefine ebb's let start_ebb = bcx.create_ebb(); let ebb_map: IndexVec = (0..mir.basic_blocks().len()).map(|_| bcx.create_ebb()).collect(); + let mut cold_ebbs = EntitySet::new(); + for (bb, &ebb) in ebb_map.iter_enumerated() { + if mir.basic_blocks()[bb].is_cleanup { + cold_ebbs.insert(ebb); + } + } // Make FunctionCx let pointer_type = cx.module.target_config().pointer_type(); @@ -49,6 +55,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( ebb_map, local_map: HashMap::new(), caller_location: None, // set by `codegen_fn_prelude` + cold_ebbs, clif_comments, constants_cx: &mut cx.constants_cx, @@ -73,6 +80,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( let mut clif_comments = fx.clif_comments; let source_info_set = fx.source_info_set; let local_map = fx.local_map; + let cold_ebbs = fx.cold_ebbs; #[cfg(debug_assertions)] crate::pretty_clif::write_clif_file(cx.tcx, "unopt", instance, &context.func, &clif_comments, None); @@ -82,7 +90,7 @@ pub fn trans_fn<'clif, 'tcx, B: Backend + 'static>( // Perform rust specific optimizations tcx.sess.time("optimize clif ir", || { - crate::optimize::optimize_function(tcx, instance, context, &mut clif_comments); + crate::optimize::optimize_function(tcx, instance, context, &cold_ebbs, &mut clif_comments); }); // Define function @@ -191,8 +199,11 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) { } } let cond = trans_operand(fx, cond).load_scalar(fx); + let target = fx.get_ebb(*target); let failure = fx.bcx.create_ebb(); + fx.cold_ebbs.insert(failure); + if *expected { fx.bcx.ins().brz(cond, failure, &[]); } else { @@ -200,8 +211,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, impl Backend>) { }; fx.bcx.ins().jump(target, &[]); - // FIXME insert bb after all other bb's to reduce the amount of jumps in the common - // case and improve code locality. fx.bcx.switch_to_block(failure); trap_panic( fx, diff --git a/src/common.rs b/src/common.rs index 66e690d31222c..201467c62a009 100644 --- a/src/common.rs +++ b/src/common.rs @@ -270,6 +270,9 @@ pub struct FunctionCx<'clif, 'tcx, B: Backend + 'static> { /// When `#[track_caller]` is used, the implicit caller location is stored in this variable. pub caller_location: Option>, + /// See [crate::optimize::code_layout] for more information. + pub cold_ebbs: EntitySet, + pub clif_comments: crate::pretty_clif::CommentWriter, pub constants_cx: &'clif mut crate::constant::ConstantCx, pub vtables: &'clif mut HashMap<(Ty<'tcx>, Option>), DataId>, diff --git a/src/lib.rs b/src/lib.rs index 0c84f1d67045b..e4eb4147ce3e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,6 +95,7 @@ mod prelude { pub use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleKind}; pub use cranelift_codegen::Context; + pub use cranelift_codegen::entity::EntitySet; pub use cranelift_codegen::ir::{AbiParam, Ebb, ExternalName, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot, StackSlotData, StackSlotKind, TrapCode, Type, Value}; pub use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; pub use cranelift_codegen::ir::function::Function; diff --git a/src/optimize/code_layout.rs b/src/optimize/code_layout.rs new file mode 100644 index 0000000000000..947b61d42e0b9 --- /dev/null +++ b/src/optimize/code_layout.rs @@ -0,0 +1,34 @@ +//! This optimization moves cold code to the end of the function. +//! +//! Some code is executed much less often than other code. For example panicking or the +//! landingpads for unwinding. By moving this cold code to the end of the function the average +//! amount of jumps is reduced and the code locality is improved. +//! +//! # Undefined behaviour +//! +//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself. + +use crate::prelude::*; + +pub fn optimize_function(ctx: &mut Context, cold_ebbs: &EntitySet) { + // FIXME Move the ebb in place instead of remove and append once + // bytecodealliance/cranelift#1339 is implemented. + + let mut ebb_insts = HashMap::new(); + for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) { + let insts = ctx.func.layout.ebb_insts(ebb).collect::>(); + for &inst in &insts { + ctx.func.layout.remove_inst(inst); + } + ebb_insts.insert(ebb, insts); + ctx.func.layout.remove_ebb(ebb); + } + + // And then append them at the back again. + for ebb in cold_ebbs.keys().filter(|&ebb| cold_ebbs.contains(ebb)) { + ctx.func.layout.append_ebb(ebb); + for inst in ebb_insts.remove(&ebb).unwrap() { + ctx.func.layout.append_inst(inst, ebb); + } + } +} diff --git a/src/optimize/mod.rs b/src/optimize/mod.rs index 59e4d2dd47d1e..ba9839e84be12 100644 --- a/src/optimize/mod.rs +++ b/src/optimize/mod.rs @@ -1,13 +1,18 @@ use crate::prelude::*; +mod code_layout; mod stack2reg; pub fn optimize_function<'tcx>( tcx: TyCtxt<'tcx>, instance: Instance<'tcx>, ctx: &mut Context, + cold_ebbs: &EntitySet, clif_comments: &mut crate::pretty_clif::CommentWriter, ) { + // The code_layout optimization is very cheap. + self::code_layout::optimize_function(ctx, cold_ebbs); + if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No { return; // FIXME classify optimizations over opt levels } diff --git a/src/optimize/stack2reg.rs b/src/optimize/stack2reg.rs index 4762b40db8dd9..b1afa9ab8bb15 100644 --- a/src/optimize/stack2reg.rs +++ b/src/optimize/stack2reg.rs @@ -13,7 +13,6 @@ use std::collections::{BTreeMap, HashSet}; use std::ops::Not; use cranelift_codegen::cursor::{Cursor, FuncCursor}; -use cranelift_codegen::entity::EntitySet; use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef}; use cranelift_codegen::ir::immediates::Offset32;