From 3a33749404b8d453cdcc883d5d69d5b9c3d62dde Mon Sep 17 00:00:00 2001 From: Peter Huene Date: Sat, 4 Jul 2020 15:16:09 -0700 Subject: [PATCH 1/4] Remove 'set frame pointer' unwind code from Windows x64 unwind. This commit removes the "set frame pointer" unwind code and frame pointer information from Windows x64 unwind information. In Windows x64 unwind information, a "frame pointer" is actually the *base address* of the static part of the local frame and would be at some negative offset to RSP upon establishing the frame pointer. Currently Cranelift uses a "traditional" notion of a frame pointer, one that is the highest address in the local frame (i.e. pointing at the previous frame pointer on the stack). Windows x64 unwind doesn't describe such frame pointers and only needs one described if the frame contains a dynamic stack allocation. Fixes #1967. --- cranelift/codegen/src/isa/unwind/winx64.rs | 9 -- cranelift/codegen/src/isa/x86/abi.rs | 3 +- .../codegen/src/isa/x86/unwind/winx64.rs | 111 ++++-------------- .../isa/x86/windows_fastcall_x64_unwind.clif | 62 +++------- 4 files changed, 41 insertions(+), 144 deletions(-) diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 9219d293e2bc..649b0e7ed05b 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -57,10 +57,6 @@ pub(crate) enum UnwindCode { offset: u8, size: u32, }, - SetFramePointer { - offset: u8, - sp_offset: u8, - }, } impl UnwindCode { @@ -69,7 +65,6 @@ impl UnwindCode { PushNonvolatileRegister = 0, LargeStackAlloc = 1, SmallStackAlloc = 2, - SetFramePointer = 3, SaveXmm128 = 8, SaveXmm128Far = 9, } @@ -113,10 +108,6 @@ impl UnwindCode { writer.write_u32::(*size); } } - Self::SetFramePointer { offset, sp_offset } => { - writer.write_u8(*offset); - writer.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8)); - } }; } diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index 02358ba89bf0..6cd1175cd637 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -1071,8 +1071,7 @@ pub fn create_unwind_info( .map(|u| UnwindInfo::SystemV(u)) } CallConv::WindowsFastcall => { - super::unwind::winx64::create_unwind_info(func, isa, Some(RU::rbp.into()))? - .map(|u| UnwindInfo::WindowsX64(u)) + super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) } _ => None, }) diff --git a/cranelift/codegen/src/isa/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/x86/unwind/winx64.rs index de35fc696402..79afc27643a0 100644 --- a/cranelift/codegen/src/isa/x86/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/x86/unwind/winx64.rs @@ -13,7 +13,6 @@ use log::warn; pub(crate) fn create_unwind_info( func: &Function, isa: &dyn TargetIsa, - frame_register: Option, ) -> CodegenResult> { // Only Windows fastcall is supported for unwind information if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() { @@ -28,7 +27,6 @@ pub(crate) fn create_unwind_info( let mut prologue_size = 0; let mut unwind_codes = Vec::new(); let mut found_end = false; - let mut xmm_save_count: u8 = 0; for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) { // x64 ABI prologues cannot exceed 255 bytes in length @@ -64,16 +62,6 @@ pub(crate) fn create_unwind_info( _ => {} } } - InstructionData::CopySpecial { src, dst, .. } => { - if let Some(frame_register) = frame_register { - if src == (RU::rsp as RegUnit) && dst == frame_register { - unwind_codes.push(UnwindCode::SetFramePointer { - offset: unwind_offset, - sp_offset: 0, - }); - } - } - } InstructionData::UnaryImm { opcode, imm } => { match opcode { Opcode::Iconst => { @@ -112,7 +100,6 @@ pub(crate) fn create_unwind_info( { // If this is a save of an FPR, record an unwind operation // Note: the stack_offset here is relative to an adjusted SP - // This will be fixed up later to be based on the frame pointer offset if dst == (RU::rsp as RegUnit) && FPR.contains(src) { let offset: i32 = offset.into(); unwind_codes.push(UnwindCode::SaveXmm { @@ -120,8 +107,6 @@ pub(crate) fn create_unwind_info( reg: src as u8, stack_offset: offset as u32, }); - - xmm_save_count += 1; } } } @@ -136,45 +121,11 @@ pub(crate) fn create_unwind_info( assert!(found_end); - // When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame - // register minus some offset, forming a "base address". This attempts to calculate the frame register offset - // while updating the XMM save offsets to be relative from this "base address" rather than RSP. - let mut frame_register_offset = 0; - if frame_register.is_some() && xmm_save_count > 0 { - // Determine the number of 16-byte slots used for all CSRs (including GPRs) - // The "frame register offset" will point at the last slot used (i.e. the last saved FPR) - // Assumption: each FPR is stored at a lower address than the previous one - let mut last_stack_offset = None; - let mut fpr_save_count: u8 = 0; - let mut gpr_push_count: u8 = 0; - for code in unwind_codes.iter_mut() { - match code { - UnwindCode::SaveXmm { stack_offset, .. } => { - if let Some(last) = last_stack_offset { - assert!(last > *stack_offset); - } - last_stack_offset = Some(*stack_offset); - fpr_save_count += 1; - *stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16; - } - UnwindCode::PushRegister { .. } => { - gpr_push_count += 1; - } - _ => {} - } - } - assert_eq!(fpr_save_count, xmm_save_count); - - // Account for alignment space when there's an odd number of GPR pushes - // Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division - frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2); - } - Ok(Some(UnwindInfo { flags: 0, // this assumes cranelift functions have no SEH handlers prologue_size: prologue_size as u8, - frame_register: frame_register.map(|r| GPR.index_of(r) as u8), - frame_register_offset, + frame_register: None, + frame_register_offset: 0, unwind_codes, })) } @@ -201,7 +152,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); assert_eq!( - create_unwind_info(&context.func, &*isa, None).expect("can create unwind info"), + create_unwind_info(&context.func, &*isa).expect("can create unwind info"), None ); } @@ -219,7 +170,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -228,17 +179,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 9, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 9, size: 64 @@ -247,9 +194,9 @@ mod tests { } ); - assert_eq!(unwind.emit_size(), 12); + assert_eq!(unwind.emit_size(), 8); - let mut buf = [0u8; 12]; + let mut buf = [0u8; 8]; unwind.emit(&mut buf); assert_eq!( @@ -257,16 +204,12 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x09, // Prologue size - 0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x02, // Unwind code count (1 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x09, // Prolog offset 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) - 0x00, // Padding byte - 0x00, // Padding byte ] ); } @@ -284,7 +227,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -293,17 +236,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 27, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 27, size: 10000 @@ -322,16 +261,16 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x1B, // Prologue size - 0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x03, // Unwind code count (2 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x1B, // Prolog offset 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) 0xE2, // Low size byte 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) + 0x00, // Padding + 0x00, // Padding ] ); } @@ -349,7 +288,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -358,17 +297,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 27, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 27, size: 1000000 @@ -377,9 +312,9 @@ mod tests { } ); - assert_eq!(unwind.emit_size(), 16); + assert_eq!(unwind.emit_size(), 12); - let mut buf = [0u8; 16]; + let mut buf = [0u8; 12]; unwind.emit(&mut buf); assert_eq!( @@ -387,20 +322,16 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x1B, // Prologue size - 0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x04, // Unwind code count (3 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x1B, // Prolog offset 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) 0x40, // Byte 1 of size 0x42, // Byte 2 of size 0x0F, // Byte 3 of size 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - 0x00, // Padding byte - 0x00, // Padding byte ] ); } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index 4e5d4f18f3ea..fcfe12b80b2c 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -11,17 +11,13 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 4 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 2 +; nextln: unwind codes: 1 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 -; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 ; check the unwind information with a non-leaf function with no args function %no_args() windows_fastcall { @@ -33,18 +29,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 8 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 8 ; nextln: op: SmallStackAlloc ; nextln: info: 3 @@ -58,18 +50,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 17 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 -; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: +; nextln: ; nextln: offset: 17 ; nextln: op: LargeStackAlloc ; nextln: info: 0 @@ -84,18 +72,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 17 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 17 ; nextln: op: LargeStackAlloc ; nextln: info: 1 @@ -136,18 +120,14 @@ block0(v0: i64, v1: i64): ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 22 -; nextln: frame register: 5 -; nextln: frame register offset: 2 -; nextln: unwind codes: 5 +; nextln: frame register: 0 +; nextln: frame register offset: 0 +; nextln: unwind codes: 4 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 6 ; nextln: op: PushNonvolatileRegister ; nextln: info: 15 @@ -160,7 +140,7 @@ block0(v0: i64, v1: i64): ; nextln: offset: 22 ; nextln: op: SaveXmm128 ; nextln: info: 15 -; nextln: value: 0 (u16) +; nextln: value: 10 (u16) ; check a function that has CSRs function %lots_of_registers(i64, i64) windows_fastcall { @@ -214,18 +194,14 @@ block0(v0: i64, v1: i64): ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 35 -; nextln: frame register: 5 -; nextln: frame register offset: 7 -; nextln: unwind codes: 13 +; nextln: frame register: 0 +; nextln: frame register offset: 0 +; nextln: unwind codes: 12 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 5 ; nextln: op: PushNonvolatileRegister ; nextln: info: 3 @@ -261,14 +237,14 @@ block0(v0: i64, v1: i64): ; nextln: offset: 24 ; nextln: op: SaveXmm128 ; nextln: info: 6 -; nextln: value: 2 (u16) +; nextln: value: 3 (u16) ; nextln: ; nextln: offset: 29 ; nextln: op: SaveXmm128 ; nextln: info: 7 -; nextln: value: 1 (u16) +; nextln: value: 2 (u16) ; nextln: ; nextln: offset: 35 ; nextln: op: SaveXmm128 ; nextln: info: 8 -; nextln: value: 0 (u16) +; nextln: value: 1 (u16) From b391817c0fbfc8f05ca8f80f912c1d3a8099bf2f Mon Sep 17 00:00:00 2001 From: Peter Huene Date: Mon, 6 Jul 2020 14:17:42 -0700 Subject: [PATCH 2/4] Add a test case for unwind with saved FPRs on Windows. This commit adds a simple test case that reproduces the problem in --- tests/all/cli_tests.rs | 11 ++++++++ tests/wasm/exit_with_saved_fprs.wat | 40 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/wasm/exit_with_saved_fprs.wat diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index 41a0e155adc3..5c825cd59040 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -350,3 +350,14 @@ fn greeter_preload_callable_command() -> Result<()> { assert_eq!(stdout, "Hello _start\nHello callable greet\nHello done\n"); Ok(()) } + +// Ensure successful WASI exit call with FPR saving frames on stack for Windows x64 +// See https://github.com/bytecodealliance/wasmtime/issues/1967 +#[test] +fn exit_with_saved_fprs() -> Result<()> { + let wasm = build_wasm("tests/wasm/exit_with_saved_fprs.wat")?; + let output = run_wasmtime_for_output(&[wasm.path().to_str().unwrap(), "--disable-cache"])?; + assert_eq!(output.status.code().unwrap(), 0); + assert!(output.stdout.is_empty()); + Ok(()) +} diff --git a/tests/wasm/exit_with_saved_fprs.wat b/tests/wasm/exit_with_saved_fprs.wat new file mode 100644 index 000000000000..6149c6bdf2a9 --- /dev/null +++ b/tests/wasm/exit_with_saved_fprs.wat @@ -0,0 +1,40 @@ +;;; This is a test case for https://github.com/bytecodealliance/wasmtime/issues/1967 +(module + (type (func (param i32))) + + (import "wasi_snapshot_preview1" "proc_exit" (func (type 0))) + + (func $exit (param i32) + local.get 0 + call 0 + unreachable + ) + + (func $do_something (param f64 f64 f64 f64 f64 f64 f64 f64) + i32.const 0 + call $exit + unreachable + ) + + (func $has_saved_fprs (export "_start") + (local f64 f64 f64 f64 f64 f64 f64 f64) + (local.set 0 (f64.const 1)) + (local.set 1 (f64.const 2)) + (local.set 2 (f64.const 3)) + (local.set 3 (f64.const 4)) + (local.set 4 (f64.const 5)) + (local.set 5 (f64.const 6)) + (local.set 6 (f64.const 7)) + (local.set 7 (f64.const 8)) + local.get 0 + local.get 1 + local.get 2 + local.get 3 + local.get 4 + local.get 5 + local.get 6 + local.get 7 + call $do_something + unreachable + ) +) From b1c7c1401ef8948aeb311f23b6bdd8d62fbedc6a Mon Sep 17 00:00:00 2001 From: Peter Huene Date: Mon, 6 Jul 2020 18:52:16 -0700 Subject: [PATCH 3/4] Fix incorrect scaling for SaveXmm128Far. The `SaveXmm128Far` unwind op should have a 32-bit unscaled value. The value was accidentally scaled down by 16 while calculating whether or not the `SaveXmm128` or `SaveXmm128Far` unwind op should be used. --- cranelift/codegen/src/isa/unwind/winx64.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 649b0e7ed05b..6c06a8e67dea 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -80,13 +80,13 @@ impl UnwindCode { stack_offset, } => { writer.write_u8(*offset); - let stack_offset = stack_offset / 16; - if stack_offset <= core::u16::MAX as u32 { + let scaled_stack_offset = stack_offset / 16; + if scaled_stack_offset <= core::u16::MAX as u32 { writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8)); - writer.write_u16::(stack_offset as u16); + writer.write_u16::(scaled_stack_offset as u16); } else { writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8)); - writer.write_u16::(stack_offset as u16); + writer.write_u16::(*stack_offset as u16); writer.write_u16::((stack_offset >> 16) as u16); } } From 92864baa1f004359aa3340bae483424e959f8fa3 Mon Sep 17 00:00:00 2001 From: Peter Huene Date: Mon, 6 Jul 2020 19:02:52 -0700 Subject: [PATCH 4/4] Fix module doc comment. --- cranelift/codegen/src/isa/unwind/winx64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 6c06a8e67dea..02600296ff1d 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -1,4 +1,4 @@ -//! System V ABI unwind information. +//! Windows x64 ABI unwind information. use alloc::vec::Vec; use byteorder::{ByteOrder, LittleEndian};