diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 9219d293e2bc..02600296ff1d 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -1,4 +1,4 @@ -//! System V ABI unwind information. +//! Windows x64 ABI unwind information. use alloc::vec::Vec; use byteorder::{ByteOrder, LittleEndian}; @@ -57,10 +57,6 @@ pub(crate) enum UnwindCode { offset: u8, size: u32, }, - SetFramePointer { - offset: u8, - sp_offset: u8, - }, } impl UnwindCode { @@ -69,7 +65,6 @@ impl UnwindCode { PushNonvolatileRegister = 0, LargeStackAlloc = 1, SmallStackAlloc = 2, - SetFramePointer = 3, SaveXmm128 = 8, SaveXmm128Far = 9, } @@ -85,13 +80,13 @@ impl UnwindCode { stack_offset, } => { writer.write_u8(*offset); - let stack_offset = stack_offset / 16; - if stack_offset <= core::u16::MAX as u32 { + let scaled_stack_offset = stack_offset / 16; + if scaled_stack_offset <= core::u16::MAX as u32 { writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8)); - writer.write_u16::(stack_offset as u16); + writer.write_u16::(scaled_stack_offset as u16); } else { writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8)); - writer.write_u16::(stack_offset as u16); + writer.write_u16::(*stack_offset as u16); writer.write_u16::((stack_offset >> 16) as u16); } } @@ -113,10 +108,6 @@ impl UnwindCode { writer.write_u32::(*size); } } - Self::SetFramePointer { offset, sp_offset } => { - writer.write_u8(*offset); - writer.write_u8((*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8)); - } }; } diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index 02358ba89bf0..6cd1175cd637 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -1071,8 +1071,7 @@ pub fn create_unwind_info( .map(|u| UnwindInfo::SystemV(u)) } CallConv::WindowsFastcall => { - super::unwind::winx64::create_unwind_info(func, isa, Some(RU::rbp.into()))? - .map(|u| UnwindInfo::WindowsX64(u)) + super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) } _ => None, }) diff --git a/cranelift/codegen/src/isa/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/x86/unwind/winx64.rs index de35fc696402..79afc27643a0 100644 --- a/cranelift/codegen/src/isa/x86/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/x86/unwind/winx64.rs @@ -13,7 +13,6 @@ use log::warn; pub(crate) fn create_unwind_info( func: &Function, isa: &dyn TargetIsa, - frame_register: Option, ) -> CodegenResult> { // Only Windows fastcall is supported for unwind information if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() { @@ -28,7 +27,6 @@ pub(crate) fn create_unwind_info( let mut prologue_size = 0; let mut unwind_codes = Vec::new(); let mut found_end = false; - let mut xmm_save_count: u8 = 0; for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) { // x64 ABI prologues cannot exceed 255 bytes in length @@ -64,16 +62,6 @@ pub(crate) fn create_unwind_info( _ => {} } } - InstructionData::CopySpecial { src, dst, .. } => { - if let Some(frame_register) = frame_register { - if src == (RU::rsp as RegUnit) && dst == frame_register { - unwind_codes.push(UnwindCode::SetFramePointer { - offset: unwind_offset, - sp_offset: 0, - }); - } - } - } InstructionData::UnaryImm { opcode, imm } => { match opcode { Opcode::Iconst => { @@ -112,7 +100,6 @@ pub(crate) fn create_unwind_info( { // If this is a save of an FPR, record an unwind operation // Note: the stack_offset here is relative to an adjusted SP - // This will be fixed up later to be based on the frame pointer offset if dst == (RU::rsp as RegUnit) && FPR.contains(src) { let offset: i32 = offset.into(); unwind_codes.push(UnwindCode::SaveXmm { @@ -120,8 +107,6 @@ pub(crate) fn create_unwind_info( reg: src as u8, stack_offset: offset as u32, }); - - xmm_save_count += 1; } } } @@ -136,45 +121,11 @@ pub(crate) fn create_unwind_info( assert!(found_end); - // When using a frame register, certain unwind operations, such as XMM saves, are relative to the frame - // register minus some offset, forming a "base address". This attempts to calculate the frame register offset - // while updating the XMM save offsets to be relative from this "base address" rather than RSP. - let mut frame_register_offset = 0; - if frame_register.is_some() && xmm_save_count > 0 { - // Determine the number of 16-byte slots used for all CSRs (including GPRs) - // The "frame register offset" will point at the last slot used (i.e. the last saved FPR) - // Assumption: each FPR is stored at a lower address than the previous one - let mut last_stack_offset = None; - let mut fpr_save_count: u8 = 0; - let mut gpr_push_count: u8 = 0; - for code in unwind_codes.iter_mut() { - match code { - UnwindCode::SaveXmm { stack_offset, .. } => { - if let Some(last) = last_stack_offset { - assert!(last > *stack_offset); - } - last_stack_offset = Some(*stack_offset); - fpr_save_count += 1; - *stack_offset = (xmm_save_count - fpr_save_count) as u32 * 16; - } - UnwindCode::PushRegister { .. } => { - gpr_push_count += 1; - } - _ => {} - } - } - assert_eq!(fpr_save_count, xmm_save_count); - - // Account for alignment space when there's an odd number of GPR pushes - // Assumption: an FPR (16 bytes) is twice the size of a GPR (8 bytes), hence the (rounded-up) integer division - frame_register_offset = fpr_save_count + ((gpr_push_count + 1) / 2); - } - Ok(Some(UnwindInfo { flags: 0, // this assumes cranelift functions have no SEH handlers prologue_size: prologue_size as u8, - frame_register: frame_register.map(|r| GPR.index_of(r) as u8), - frame_register_offset, + frame_register: None, + frame_register_offset: 0, unwind_codes, })) } @@ -201,7 +152,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); assert_eq!( - create_unwind_info(&context.func, &*isa, None).expect("can create unwind info"), + create_unwind_info(&context.func, &*isa).expect("can create unwind info"), None ); } @@ -219,7 +170,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -228,17 +179,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 9, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 9, size: 64 @@ -247,9 +194,9 @@ mod tests { } ); - assert_eq!(unwind.emit_size(), 12); + assert_eq!(unwind.emit_size(), 8); - let mut buf = [0u8; 12]; + let mut buf = [0u8; 8]; unwind.emit(&mut buf); assert_eq!( @@ -257,16 +204,12 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x09, // Prologue size - 0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x02, // Unwind code count (1 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x09, // Prolog offset 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) - 0x00, // Padding byte - 0x00, // Padding byte ] ); } @@ -284,7 +227,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -293,17 +236,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 27, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 27, size: 10000 @@ -322,16 +261,16 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x1B, // Prologue size - 0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x03, // Unwind code count (2 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x1B, // Prolog offset 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) 0xE2, // Low size byte 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) + 0x00, // Padding + 0x00, // Padding ] ); } @@ -349,7 +288,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - let unwind = create_unwind_info(&context.func, &*isa, Some(RU::rbp.into())) + let unwind = create_unwind_info(&context.func, &*isa) .expect("can create unwind info") .expect("expected unwind info"); @@ -358,17 +297,13 @@ mod tests { UnwindInfo { flags: 0, prologue_size: 27, - frame_register: Some(GPR.index_of(RU::rbp.into()) as u8), + frame_register: None, frame_register_offset: 0, unwind_codes: vec![ UnwindCode::PushRegister { offset: 2, reg: GPR.index_of(RU::rbp.into()) as u8 }, - UnwindCode::SetFramePointer { - offset: 5, - sp_offset: 0 - }, UnwindCode::StackAlloc { offset: 27, size: 1000000 @@ -377,9 +312,9 @@ mod tests { } ); - assert_eq!(unwind.emit_size(), 16); + assert_eq!(unwind.emit_size(), 12); - let mut buf = [0u8; 16]; + let mut buf = [0u8; 12]; unwind.emit(&mut buf); assert_eq!( @@ -387,20 +322,16 @@ mod tests { [ 0x01, // Version and flags (version 1, no flags) 0x1B, // Prologue size - 0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg) - 0x05, // Frame register + offset (RBP with 0 offset) + 0x04, // Unwind code count (3 for stack alloc, 1 for push reg) + 0x00, // Frame register + offset (no frame register) 0x1B, // Prolog offset 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) 0x40, // Byte 1 of size 0x42, // Byte 2 of size 0x0F, // Byte 3 of size 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes) - 0x05, // Prolog offset - 0x03, // Operation 3 (save frame register), stack pointer offset = 0 0x02, // Prolog offset 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - 0x00, // Padding byte - 0x00, // Padding byte ] ); } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index 4e5d4f18f3ea..fcfe12b80b2c 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -11,17 +11,13 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 4 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 2 +; nextln: unwind codes: 1 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 -; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 ; check the unwind information with a non-leaf function with no args function %no_args() windows_fastcall { @@ -33,18 +29,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 8 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 8 ; nextln: op: SmallStackAlloc ; nextln: info: 3 @@ -58,18 +50,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 17 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 -; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: +; nextln: ; nextln: offset: 17 ; nextln: op: LargeStackAlloc ; nextln: info: 0 @@ -84,18 +72,14 @@ block0: ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 17 -; nextln: frame register: 5 +; nextln: frame register: 0 ; nextln: frame register offset: 0 -; nextln: unwind codes: 3 +; nextln: unwind codes: 2 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 17 ; nextln: op: LargeStackAlloc ; nextln: info: 1 @@ -136,18 +120,14 @@ block0(v0: i64, v1: i64): ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 22 -; nextln: frame register: 5 -; nextln: frame register offset: 2 -; nextln: unwind codes: 5 +; nextln: frame register: 0 +; nextln: frame register offset: 0 +; nextln: unwind codes: 4 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 6 ; nextln: op: PushNonvolatileRegister ; nextln: info: 15 @@ -160,7 +140,7 @@ block0(v0: i64, v1: i64): ; nextln: offset: 22 ; nextln: op: SaveXmm128 ; nextln: info: 15 -; nextln: value: 0 (u16) +; nextln: value: 10 (u16) ; check a function that has CSRs function %lots_of_registers(i64, i64) windows_fastcall { @@ -214,18 +194,14 @@ block0(v0: i64, v1: i64): ; sameln: version: 1 ; nextln: flags: 0 ; nextln: prologue size: 35 -; nextln: frame register: 5 -; nextln: frame register offset: 7 -; nextln: unwind codes: 13 +; nextln: frame register: 0 +; nextln: frame register offset: 0 +; nextln: unwind codes: 12 ; nextln: ; nextln: offset: 1 ; nextln: op: PushNonvolatileRegister ; nextln: info: 5 ; nextln: -; nextln: offset: 4 -; nextln: op: SetFramePointer -; nextln: info: 0 -; nextln: ; nextln: offset: 5 ; nextln: op: PushNonvolatileRegister ; nextln: info: 3 @@ -261,14 +237,14 @@ block0(v0: i64, v1: i64): ; nextln: offset: 24 ; nextln: op: SaveXmm128 ; nextln: info: 6 -; nextln: value: 2 (u16) +; nextln: value: 3 (u16) ; nextln: ; nextln: offset: 29 ; nextln: op: SaveXmm128 ; nextln: info: 7 -; nextln: value: 1 (u16) +; nextln: value: 2 (u16) ; nextln: ; nextln: offset: 35 ; nextln: op: SaveXmm128 ; nextln: info: 8 -; nextln: value: 0 (u16) +; nextln: value: 1 (u16) diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index 41a0e155adc3..5c825cd59040 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -350,3 +350,14 @@ fn greeter_preload_callable_command() -> Result<()> { assert_eq!(stdout, "Hello _start\nHello callable greet\nHello done\n"); Ok(()) } + +// Ensure successful WASI exit call with FPR saving frames on stack for Windows x64 +// See https://github.com/bytecodealliance/wasmtime/issues/1967 +#[test] +fn exit_with_saved_fprs() -> Result<()> { + let wasm = build_wasm("tests/wasm/exit_with_saved_fprs.wat")?; + let output = run_wasmtime_for_output(&[wasm.path().to_str().unwrap(), "--disable-cache"])?; + assert_eq!(output.status.code().unwrap(), 0); + assert!(output.stdout.is_empty()); + Ok(()) +} diff --git a/tests/wasm/exit_with_saved_fprs.wat b/tests/wasm/exit_with_saved_fprs.wat new file mode 100644 index 000000000000..6149c6bdf2a9 --- /dev/null +++ b/tests/wasm/exit_with_saved_fprs.wat @@ -0,0 +1,40 @@ +;;; This is a test case for https://github.com/bytecodealliance/wasmtime/issues/1967 +(module + (type (func (param i32))) + + (import "wasi_snapshot_preview1" "proc_exit" (func (type 0))) + + (func $exit (param i32) + local.get 0 + call 0 + unreachable + ) + + (func $do_something (param f64 f64 f64 f64 f64 f64 f64 f64) + i32.const 0 + call $exit + unreachable + ) + + (func $has_saved_fprs (export "_start") + (local f64 f64 f64 f64 f64 f64 f64 f64) + (local.set 0 (f64.const 1)) + (local.set 1 (f64.const 2)) + (local.set 2 (f64.const 3)) + (local.set 3 (f64.const 4)) + (local.set 4 (f64.const 5)) + (local.set 5 (f64.const 6)) + (local.set 6 (f64.const 7)) + (local.set 7 (f64.const 8)) + local.get 0 + local.get 1 + local.get 2 + local.get 3 + local.get 4 + local.get 5 + local.get 6 + local.get 7 + call $do_something + unreachable + ) +)