Skip to content

Commit

Permalink
wasmtime: Remove redundant epoch check on function entry (#8853)
Browse files Browse the repository at this point in the history
The epoch interruption implementation caches the current deadline in a
register, and avoids reloading that cache until the cached deadline has
passed.

However, the first epoch check happens immediately after the cache has
been populated on function entry, so there's never a reason to reload
the cache at that point. It only needs to be reloaded in loops. So this
commit eliminates the double-check on function entry.

When Cranelift optimizations are enabled, the alias analysis pass
correctly detected that this load was redundant, and the egraph pass
optimized away the `icmp` as well. However, since we don't do
conditional constant propagation, the branch couldn't be optimized away.
On x86 this lowered to a redundant `cmp`/`jae` pair of instructions in
every function entry, which this commit eliminates.

To keep track of what code we're generating for epoch interruptions,
I've also added disas tests with a trivial infinite loop.
  • Loading branch information
jameysharp authored Jun 20, 2024
1 parent 67afe4d commit e29d56e
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 46 deletions.
91 changes: 45 additions & 46 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> {

fn epoch_function_entry(&mut self, builder: &mut FunctionBuilder<'_>) {
builder.declare_var(self.epoch_deadline_var, ir::types::I64);
self.epoch_load_deadline_into_var(builder);
// Let epoch_check_full load the current deadline and call def_var

builder.declare_var(self.epoch_ptr_var, self.pointer_type());
let epoch_ptr = self.epoch_ptr(builder);
builder.def_var(self.epoch_ptr_var, epoch_ptr);
Expand All @@ -532,7 +533,9 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
// sufficient. Then, combined with checks at every backedge
// (loop) the longest runtime between checks is bounded by the
// straightline length of any function body.
self.epoch_check(builder);
let continuation_block = builder.create_block();
let cur_epoch_value = self.epoch_load_current(builder);
self.epoch_check_full(builder, cur_epoch_value, continuation_block);
}

#[cfg(feature = "wmemcheck")]
Expand Down Expand Up @@ -597,33 +600,30 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
)
}

fn epoch_load_deadline_into_var(&mut self, builder: &mut FunctionBuilder<'_>) {
let deadline =
builder.ins().load(
ir::types::I64,
ir::MemFlags::trusted(),
self.vmruntime_limits_ptr,
ir::immediates::Offset32::new(
self.offsets.ptr.vmruntime_limits_epoch_deadline() as i32
),
);
builder.def_var(self.epoch_deadline_var, deadline);
fn epoch_check(&mut self, builder: &mut FunctionBuilder<'_>) {
let continuation_block = builder.create_block();

// Load new epoch and check against the cached deadline.
let cur_epoch_value = self.epoch_load_current(builder);
self.epoch_check_cached(builder, cur_epoch_value, continuation_block);

// At this point we've noticed that the epoch has exceeded our
// cached deadline. However the real deadline may have been
// updated (within another yield) during some function that we
// called in the meantime, so reload the cache and check again.
self.epoch_check_full(builder, cur_epoch_value, continuation_block);
}

fn epoch_check(&mut self, builder: &mut FunctionBuilder<'_>) {
fn epoch_check_cached(
&mut self,
builder: &mut FunctionBuilder,
cur_epoch_value: ir::Value,
continuation_block: ir::Block,
) {
let new_epoch_block = builder.create_block();
let new_epoch_doublecheck_block = builder.create_block();
let continuation_block = builder.create_block();
builder.set_cold_block(new_epoch_block);
builder.set_cold_block(new_epoch_doublecheck_block);

let epoch_deadline = builder.use_var(self.epoch_deadline_var);
// Load new epoch and check against cached deadline. The
// deadline may be out of date if it was updated (within
// another yield) during some function that we called; this is
// fine, as we'll reload it and check again before yielding in
// the cold path.
let cur_epoch_value = self.epoch_load_current(builder);
let cmp = builder.ins().icmp(
IntCC::UnsignedGreaterThanOrEqual,
cur_epoch_value,
Expand All @@ -634,31 +634,30 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
.brif(cmp, new_epoch_block, &[], continuation_block, &[]);
builder.seal_block(new_epoch_block);

// In the "new epoch block", we've noticed that the epoch has
// exceeded our cached deadline. However the real deadline may
// have been moved in the meantime. We keep the cached value
// in a register to speed the checks in the common case
// (between epoch ticks) but we want to do a precise check
// here, on the cold path, by reloading the latest value
// first.
builder.switch_to_block(new_epoch_block);
self.epoch_load_deadline_into_var(builder);
let fresh_epoch_deadline = builder.use_var(self.epoch_deadline_var);
let fresh_cmp = builder.ins().icmp(
IntCC::UnsignedGreaterThanOrEqual,
cur_epoch_value,
fresh_epoch_deadline,
);
builder.ins().brif(
fresh_cmp,
new_epoch_doublecheck_block,
&[],
continuation_block,
&[],
);
builder.seal_block(new_epoch_doublecheck_block);
}

fn epoch_check_full(
&mut self,
builder: &mut FunctionBuilder,
cur_epoch_value: ir::Value,
continuation_block: ir::Block,
) {
// We keep the deadline cached in a register to speed the checks
// in the common case (between epoch ticks) but we want to do a
// precise check here by reloading the cache first.
let deadline =
builder.ins().load(
ir::types::I64,
ir::MemFlags::trusted(),
self.vmruntime_limits_ptr,
ir::immediates::Offset32::new(
self.offsets.ptr.vmruntime_limits_epoch_deadline() as i32
),
);
builder.def_var(self.epoch_deadline_var, deadline);
self.epoch_check_cached(builder, cur_epoch_value, continuation_block);

builder.switch_to_block(new_epoch_doublecheck_block);
let new_epoch = self.builtin_functions.new_epoch(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
// new_epoch() returns the new deadline, so we don't have to
Expand Down
39 changes: 39 additions & 0 deletions tests/disas/epoch-interruption-x86.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
;;! target = "x86_64"
;;! test = "compile"
;;! flags = ["-Wepoch-interruption=y"]

(module (func (loop (br 0))))

;; wasm[0]::function[0]:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq (%r10), %r10
;; addq $0x30, %r10
;; cmpq %rsp, %r10
;; ja 0x7f
;; 18: subq $0x20, %rsp
;; movq %rbx, (%rsp)
;; movq %r12, 8(%rsp)
;; movq %r13, 0x10(%rsp)
;; movq 8(%rdi), %r12
;; movq 0x20(%rdi), %rbx
;; movq %rdi, %r13
;; movq (%rbx), %r9
;; movq 0x10(%r12), %rax
;; cmpq %rax, %r9
;; jae 0x57
;; 46: movq (%rbx), %rdi
;; cmpq %rax, %rdi
;; jae 0x64
;; jmp 0x46
;; 57: movq %r13, %rdi
;; callq 0xdf
;; jmp 0x46
;; 64: movq 0x10(%r12), %rax
;; cmpq %rax, %rdi
;; jb 0x46
;; 72: movq %r13, %rdi
;; callq 0xdf
;; jmp 0x46
;; 7f: ud2
47 changes: 47 additions & 0 deletions tests/disas/epoch-interruption.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
;;! target = "x86_64"
;;! test = "optimize"
;;! flags = ["-Wepoch-interruption=y"]

(module (func (loop (br 0))))

;; function u0:0(i64 vmctx, i64) tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; gv3 = vmctx
;; sig0 = (i64 vmctx) -> i64 system_v
;; fn0 = colocated u1:16 sig0
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64):
;; @0016 v3 = load.i64 notrap aligned v0+8
;; @0016 v5 = load.i64 notrap aligned v0+32
;; @0016 v6 = load.i64 notrap aligned v5
;; @0016 v7 = load.i64 notrap aligned v3+16
;; @0016 v8 = icmp uge v6, v7
;; @0016 brif v8, block3, block2(v7)
;;
;; block3 cold:
;; @0016 v10 = call fn0(v0)
;; @0016 jump block2(v10)
;;
;; block2(v21: i64):
;; @0017 jump block4(v21)
;;
;; block4(v13: i64):
;; @0017 v12 = load.i64 notrap aligned v5
;; @0017 v14 = icmp uge v12, v13
;; @0017 brif v14, block7, block6(v13)
;;
;; block7 cold:
;; @0017 v15 = load.i64 notrap aligned v3+16
;; @0017 v16 = icmp.i64 uge v12, v15
;; @0017 brif v16, block8, block6(v15)
;;
;; block8 cold:
;; @0017 v18 = call fn0(v0)
;; @0017 jump block6(v18)
;;
;; block6(v22: i64):
;; @0019 jump block4(v22)
;; }

0 comments on commit e29d56e

Please sign in to comment.