diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 5a95b22d664b..4f3facd8076a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1635,22 +1635,6 @@ (decl max_fp_value (bool u8 u8) Reg) (extern constructor max_fp_value max_fp_value) -;; Calculate the minimum acceptable floating-point value for a conversion to -;; floating point from an integer type. -;; Accepts whether the output is signed, the size of the input -;; floating point type in bits, and the size of the output integer type -;; in bits. -(decl min_fp_value_sat (bool u8 u8) Reg) -(extern constructor min_fp_value_sat min_fp_value_sat) - -;; Calculate the maximum acceptable floating-point value for a conversion to -;; floating point from an integer type. -;; Accepts whether the output is signed, the size of the input -;; floating point type in bits, and the size of the output integer type -;; in bits. -(decl max_fp_value_sat (bool u8 u8) Reg) -(extern constructor max_fp_value_sat max_fp_value_sat) - ;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane) ;; and the amount to shift by. (decl fpu_op_ri_ushr (u8 u8) FPUOpRI) @@ -3129,32 +3113,37 @@ ;; floating-point value to an integer, saturating if the value ;; does not fit in the target type. ;; Accepts the specific conversion op, the source register, -;; whether the input is signed, and finally the input and output -;; types. -(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg) -(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty) - (let ((size ScalarSize (scalar_size in_ty)) - (in_bits u8 (ty_bits in_ty)) - (out_bits u8 (ty_bits out_ty)) - (max Reg (max_fp_value_sat $true in_bits out_bits)) - (tmp Reg (fpu_rrr (FPUOp2.Min) src max size)) - (min Reg (min_fp_value_sat $true in_bits out_bits)) - (tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size)) - (zero Reg (constant_f128 0)) - (tmp ValueRegs (with_flags (fpu_cmp size src src) - (fpu_csel in_ty (Cond.Ne) zero tmp)))) - (fpu_to_int op (value_regs_get tmp 0)))) -(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty) - (let ((size ScalarSize (scalar_size in_ty)) - (in_bits u8 (ty_bits in_ty)) - (out_bits u8 (ty_bits out_ty)) - (max Reg (max_fp_value_sat $false in_bits out_bits)) - (tmp Reg (fpu_rrr (FPUOp2.Min) src max size)) - (min Reg (min_fp_value_sat $false in_bits out_bits)) - (tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size)) - (tmp ValueRegs (with_flags (fpu_cmp size src src) - (fpu_csel in_ty (Cond.Ne) min tmp)))) - (fpu_to_int op (value_regs_get tmp 0)))) +;; whether the input is signed, and finally the output type. +(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg) +(rule (fpu_to_int_cvt_sat op src _ $I64) + (fpu_to_int op src)) +(rule (fpu_to_int_cvt_sat op src _ $I32) + (fpu_to_int op src)) +(rule (fpu_to_int_cvt_sat op src $false (fits_in_16 out_ty)) + (let ((result Reg (fpu_to_int op src)) + (max Reg (imm out_ty (ImmExtend.Zero) -1))) + (with_flags_reg + (cmp (OperandSize.Size32) result max) + (csel (Cond.Hi) max result)))) +(rule (fpu_to_int_cvt_sat op src $true (fits_in_16 out_ty)) + (let ((result Reg (fpu_to_int op src)) + (max Reg (imm $I32 (ImmExtend.Sign) (signed_max out_ty))) + (min Reg (imm $I32 (ImmExtend.Sign) (signed_min out_ty))) + (result Reg (with_flags_reg + (cmp (operand_size out_ty) result max) + (csel (Cond.Gt) max result))) + (result Reg (with_flags_reg + (cmp (operand_size out_ty) result min) + (csel (Cond.Lt) min result)))) + result)) + +(decl signed_min (Type) u64) +(rule (signed_min $I8) -128) +(rule (signed_min $I16) -32768) + +(decl signed_max (Type) u64) +(rule (signed_max $I8) 127) +(rule (signed_max $I16) 32767) (decl fpu_to_int (FpuToIntOp Reg) Reg) (rule (fpu_to_int op src) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 1ef60dd72886..04e9e6ce05d6 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -472,17 +472,17 @@ (rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Fcvtzu) x (vector_size ty))) -(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32)) +(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32)))) + (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty)) (rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64)) + (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64)) -(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32)) +(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64)))) + (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty)) (rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64)) + (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64)) ;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -492,17 +492,17 @@ (rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Fcvtzs) x (vector_size ty))) -(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32)) +(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32)))) + (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty)) (rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64)) + (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64)) -(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32)) +(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64)))) + (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty)) (rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64)))) - (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64)) + (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64)) ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 075917ae2c42..7e19ddcc4b77 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -637,68 +637,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { tmp.to_reg() } - fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg { - let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap(); - - let min: f64 = match (out_bits, signed) { - (32, true) => i32::MIN as f64, - (32, false) => 0.0, - (64, true) => i64::MIN as f64, - (64, false) => 0.0, - _ => unimplemented!( - "unexpected {} output size of {} bits", - if signed { "signed" } else { "unsigned" }, - out_bits - ), - }; - - if in_bits == 32 { - lower_constant_f32(self.lower_ctx, tmp, min as f32) - } else if in_bits == 64 { - lower_constant_f64(self.lower_ctx, tmp, min) - } else { - unimplemented!( - "unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})", - in_bits, - signed, - out_bits - ); - } - - tmp.to_reg() - } - - fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg { - let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap(); - - let max = match (out_bits, signed) { - (32, true) => i32::MAX as f64, - (32, false) => u32::MAX as f64, - (64, true) => i64::MAX as f64, - (64, false) => u64::MAX as f64, - _ => unimplemented!( - "unexpected {} output size of {} bits", - if signed { "signed" } else { "unsigned" }, - out_bits - ), - }; - - if in_bits == 32 { - lower_constant_f32(self.lower_ctx, tmp, max as f32) - } else if in_bits == 64 { - lower_constant_f64(self.lower_ctx, tmp, max) - } else { - unimplemented!( - "unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})", - in_bits, - signed, - out_bits - ); - } - - tmp.to_reg() - } - fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI { if ty_bits == 32 { FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap()) diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif index 79c0f397c73d..b15edcd89cfa 100644 --- a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif @@ -225,14 +225,7 @@ block0(v0: f32): } ; block0: -; movz x4, #20352, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movi v17.2s, #0 -; fmax s19, s7, s17 -; fcmp s0, s0 -; fcsel s22, s17, s19, ne -; fcvtzu w0, s22 +; fcvtzu w0, s0 ; ret function %f18(f32) -> i64 { @@ -242,14 +235,7 @@ block0(v0: f32): } ; block0: -; movz x4, #24448, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movi v17.2s, #0 -; fmax s19, s7, s17 -; fcmp s0, s0 -; fcsel s22, s17, s19, ne -; fcvtzu x0, s22 +; fcvtzu x0, s0 ; ret function %f19(f64) -> i32 { @@ -259,13 +245,7 @@ block0(v0: f64): } ; block0: -; ldr d3, pc+8 ; b 12 ; data.f64 4294967295 -; fmin d5, d0, d3 -; movi v7.2s, #0 -; fmax d17, d5, d7 -; fcmp d0, d0 -; fcsel d20, d7, d17, ne -; fcvtzu w0, d20 +; fcvtzu w0, d0 ; ret function %f20(f64) -> i64 { @@ -275,14 +255,7 @@ block0(v0: f64): } ; block0: -; movz x4, #17392, LSL #48 -; fmov d4, x4 -; fmin d7, d0, d4 -; movi v17.2s, #0 -; fmax d19, d7, d17 -; fcmp d0, d0 -; fcsel d22, d17, d19, ne -; fcvtzu x0, d22 +; fcvtzu x0, d0 ; ret function %f21(f32) -> i32 { @@ -371,16 +344,7 @@ block0(v0: f32): } ; block0: -; movz x4, #20224, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movz x10, #52992, LSL #16 -; fmov s18, w10 -; fmax s21, s7, s18 -; movi v23.16b, #0 -; fcmp s0, s0 -; fcsel s26, s23, s21, ne -; fcvtzs w0, s26 +; fcvtzs w0, s0 ; ret function %f26(f32) -> i64 { @@ -390,16 +354,7 @@ block0(v0: f32): } ; block0: -; movz x4, #24320, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movz x10, #57088, LSL #16 -; fmov s18, w10 -; fmax s21, s7, s18 -; movi v23.16b, #0 -; fcmp s0, s0 -; fcsel s26, s23, s21, ne -; fcvtzs x0, s26 +; fcvtzs x0, s0 ; ret function %f27(f64) -> i32 { @@ -409,15 +364,7 @@ block0(v0: f64): } ; block0: -; ldr d3, pc+8 ; b 12 ; data.f64 2147483647 -; fmin d5, d0, d3 -; movz x8, #49632, LSL #48 -; fmov d16, x8 -; fmax d19, d5, d16 -; movi v21.16b, #0 -; fcmp d0, d0 -; fcsel d24, d21, d19, ne -; fcvtzs w0, d24 +; fcvtzs w0, d0 ; ret function %f28(f64) -> i64 { @@ -427,16 +374,7 @@ block0(v0: f64): } ; block0: -; movz x4, #17376, LSL #48 -; fmov d4, x4 -; fmin d7, d0, d4 -; movz x10, #50144, LSL #48 -; fmov d18, x10 -; fmax d21, d7, d18 -; movi v23.16b, #0 -; fcmp d0, d0 -; fcsel d26, d23, d21, ne -; fcvtzs x0, d26 +; fcvtzs x0, d0 ; ret function %f29(f32x4) -> i32x4 { @@ -459,3 +397,119 @@ block0(v0: f32x4): ; fcvtzs v0.4s, v0.4s ; ret +function %f31(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; fcvtzu w3, s0 +; movz w5, #255 +; subs wzr, w3, w5 +; csel x0, x5, x3, hi +; ret + +function %f32(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; fcvtzs w3, s0 +; movz w5, #127 +; movn x7, #127 +; subs wzr, w3, w5 +; csel x10, x5, x3, gt +; subs wzr, w10, w7 +; csel x0, x7, x10, lt +; ret + +function %f33(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; fcvtzu w3, s0 +; movz w5, #65535 +; subs wzr, w3, w5 +; csel x0, x5, x3, hi +; ret + +function %f34(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; fcvtzs w3, s0 +; movz w5, #32767 +; movn x7, #32767 +; subs wzr, w3, w5 +; csel x10, x5, x3, gt +; subs wzr, w10, w7 +; csel x0, x7, x10, lt +; ret + +function %f35(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; fcvtzu w3, d0 +; movz w5, #255 +; subs wzr, w3, w5 +; csel x0, x5, x3, hi +; ret + +function %f36(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; fcvtzs w3, d0 +; movz w5, #127 +; movn x7, #127 +; subs wzr, w3, w5 +; csel x10, x5, x3, gt +; subs wzr, w10, w7 +; csel x0, x7, x10, lt +; ret + +function %f37(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; fcvtzu w3, d0 +; movz w5, #65535 +; subs wzr, w3, w5 +; csel x0, x5, x3, hi +; ret + +function %f38(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; fcvtzs w3, d0 +; movz w5, #32767 +; movn x7, #32767 +; subs wzr, w3, w5 +; csel x10, x5, x3, gt +; subs wzr, w10, w7 +; csel x0, x7, x10, lt +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 2ffd58c16e6f..6f04e37d4d21 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -559,149 +559,7 @@ block0(v0: i64): ; scvtf d0, x0 ; ret -function %f49(f32) -> i32 { -block0(v0: f32): - v1 = fcvt_to_uint_sat.i32 v0 - return v1 -} - -; block0: -; movz x4, #20352, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movi v17.2s, #0 -; fmax s19, s7, s17 -; fcmp s0, s0 -; fcsel s22, s17, s19, ne -; fcvtzu w0, s22 -; ret - -function %f50(f32) -> i32 { -block0(v0: f32): - v1 = fcvt_to_sint_sat.i32 v0 - return v1 -} - -; block0: -; movz x4, #20224, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movz x10, #52992, LSL #16 -; fmov s18, w10 -; fmax s21, s7, s18 -; movi v23.16b, #0 -; fcmp s0, s0 -; fcsel s26, s23, s21, ne -; fcvtzs w0, s26 -; ret - -function %f51(f32) -> i64 { -block0(v0: f32): - v1 = fcvt_to_uint_sat.i64 v0 - return v1 -} - -; block0: -; movz x4, #24448, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movi v17.2s, #0 -; fmax s19, s7, s17 -; fcmp s0, s0 -; fcsel s22, s17, s19, ne -; fcvtzu x0, s22 -; ret - -function %f52(f32) -> i64 { -block0(v0: f32): - v1 = fcvt_to_sint_sat.i64 v0 - return v1 -} - -; block0: -; movz x4, #24320, LSL #16 -; fmov s4, w4 -; fmin s7, s0, s4 -; movz x10, #57088, LSL #16 -; fmov s18, w10 -; fmax s21, s7, s18 -; movi v23.16b, #0 -; fcmp s0, s0 -; fcsel s26, s23, s21, ne -; fcvtzs x0, s26 -; ret - -function %f53(f64) -> i32 { -block0(v0: f64): - v1 = fcvt_to_uint_sat.i32 v0 - return v1 -} - -; block0: -; ldr d3, pc+8 ; b 12 ; data.f64 4294967295 -; fmin d5, d0, d3 -; movi v7.2s, #0 -; fmax d17, d5, d7 -; fcmp d0, d0 -; fcsel d20, d7, d17, ne -; fcvtzu w0, d20 -; ret - -function %f54(f64) -> i32 { -block0(v0: f64): - v1 = fcvt_to_sint_sat.i32 v0 - return v1 -} - -; block0: -; ldr d3, pc+8 ; b 12 ; data.f64 2147483647 -; fmin d5, d0, d3 -; movz x8, #49632, LSL #48 -; fmov d16, x8 -; fmax d19, d5, d16 -; movi v21.16b, #0 -; fcmp d0, d0 -; fcsel d24, d21, d19, ne -; fcvtzs w0, d24 -; ret - -function %f55(f64) -> i64 { -block0(v0: f64): - v1 = fcvt_to_uint_sat.i64 v0 - return v1 -} - -; block0: -; movz x4, #17392, LSL #48 -; fmov d4, x4 -; fmin d7, d0, d4 -; movi v17.2s, #0 -; fmax d19, d7, d17 -; fcmp d0, d0 -; fcsel d22, d17, d19, ne -; fcvtzu x0, d22 -; ret - -function %f56(f64) -> i64 { -block0(v0: f64): - v1 = fcvt_to_sint_sat.i64 v0 - return v1 -} - -; block0: -; movz x4, #17376, LSL #48 -; fmov d4, x4 -; fmin d7, d0, d4 -; movz x10, #50144, LSL #48 -; fmov d18, x10 -; fmax d21, d7, d18 -; movi v23.16b, #0 -; fcmp d0, d0 -; fcsel d26, d23, d21, ne -; fcvtzs x0, d26 -; ret - -function %f57(f32x2) -> f32x2 { +function %f49(f32x2) -> f32x2 { block0(v0: f32x2): v1 = sqrt v0 return v1 @@ -711,7 +569,7 @@ block0(v0: f32x2): ; fsqrt v0.2s, v0.2s ; ret -function %f58(f32x4) -> f32x4 { +function %f50(f32x4) -> f32x4 { block0(v0: f32x4): v1 = sqrt v0 return v1 @@ -721,7 +579,7 @@ block0(v0: f32x4): ; fsqrt v0.4s, v0.4s ; ret -function %f59(f64x2) -> f64x2 { +function %f51(f64x2) -> f64x2 { block0(v0: f64x2): v1 = sqrt v0 return v1 @@ -731,7 +589,7 @@ block0(v0: f64x2): ; fsqrt v0.2d, v0.2d ; ret -function %f60(f32x2) -> f32x2 { +function %f52(f32x2) -> f32x2 { block0(v0: f32x2): v1 = fneg v0 return v1 @@ -741,7 +599,7 @@ block0(v0: f32x2): ; fneg v0.2s, v0.2s ; ret -function %f61(f32x4) -> f32x4 { +function %f53(f32x4) -> f32x4 { block0(v0: f32x4): v1 = fneg v0 return v1 @@ -751,7 +609,7 @@ block0(v0: f32x4): ; fneg v0.4s, v0.4s ; ret -function %f62(f64x2) -> f64x2 { +function %f54(f64x2) -> f64x2 { block0(v0: f64x2): v1 = fneg v0 return v1 @@ -761,7 +619,7 @@ block0(v0: f64x2): ; fneg v0.2d, v0.2d ; ret -function %f63(f32x2) -> f32x2 { +function %f55(f32x2) -> f32x2 { block0(v0: f32x2): v1 = fabs v0 return v1 @@ -771,7 +629,7 @@ block0(v0: f32x2): ; fabs v0.2s, v0.2s ; ret -function %f64(f32x4) -> f32x4 { +function %f56(f32x4) -> f32x4 { block0(v0: f32x4): v1 = fabs v0 return v1 @@ -781,7 +639,7 @@ block0(v0: f32x4): ; fabs v0.4s, v0.4s ; ret -function %f65(f64x2) -> f64x2 { +function %f57(f64x2) -> f64x2 { block0(v0: f64x2): v1 = fabs v0 return v1 @@ -791,7 +649,7 @@ block0(v0: f64x2): ; fabs v0.2d, v0.2d ; ret -function %f66(f32x2) -> f32x2 { +function %f58(f32x2) -> f32x2 { block0(v0: f32x2): v1 = ceil v0 return v1 @@ -801,7 +659,7 @@ block0(v0: f32x2): ; frintp v0.2s, v0.2s ; ret -function %f67(f32x4) -> f32x4 { +function %f59(f32x4) -> f32x4 { block0(v0: f32x4): v1 = ceil v0 return v1 @@ -811,7 +669,7 @@ block0(v0: f32x4): ; frintp v0.4s, v0.4s ; ret -function %f68(f64x2) -> f64x2 { +function %f60(f64x2) -> f64x2 { block0(v0: f64x2): v1 = ceil v0 return v1 @@ -821,7 +679,7 @@ block0(v0: f64x2): ; frintp v0.2d, v0.2d ; ret -function %f69(f32x2) -> f32x2 { +function %f61(f32x2) -> f32x2 { block0(v0: f32x2): v1 = floor v0 return v1 @@ -831,7 +689,7 @@ block0(v0: f32x2): ; frintm v0.2s, v0.2s ; ret -function %f70(f32x4) -> f32x4 { +function %f62(f32x4) -> f32x4 { block0(v0: f32x4): v1 = floor v0 return v1 @@ -841,7 +699,7 @@ block0(v0: f32x4): ; frintm v0.4s, v0.4s ; ret -function %f71(f64x2) -> f64x2 { +function %f63(f64x2) -> f64x2 { block0(v0: f64x2): v1 = floor v0 return v1 @@ -851,7 +709,7 @@ block0(v0: f64x2): ; frintm v0.2d, v0.2d ; ret -function %f72(f32x2) -> f32x2 { +function %f64(f32x2) -> f32x2 { block0(v0: f32x2): v1 = trunc v0 return v1 @@ -861,7 +719,7 @@ block0(v0: f32x2): ; frintz v0.2s, v0.2s ; ret -function %f73(f32x4) -> f32x4 { +function %f65(f32x4) -> f32x4 { block0(v0: f32x4): v1 = trunc v0 return v1 @@ -871,7 +729,7 @@ block0(v0: f32x4): ; frintz v0.4s, v0.4s ; ret -function %f74(f64x2) -> f64x2 { +function %f66(f64x2) -> f64x2 { block0(v0: f64x2): v1 = trunc v0 return v1 @@ -881,7 +739,7 @@ block0(v0: f64x2): ; frintz v0.2d, v0.2d ; ret -function %f75(f32x2) -> f32x2 { +function %f67(f32x2) -> f32x2 { block0(v0: f32x2): v1 = nearest v0 return v1 @@ -891,7 +749,7 @@ block0(v0: f32x2): ; frintn v0.2s, v0.2s ; ret -function %f76(f32x4) -> f32x4 { +function %f68(f32x4) -> f32x4 { block0(v0: f32x4): v1 = nearest v0 return v1 @@ -901,7 +759,7 @@ block0(v0: f32x4): ; frintn v0.4s, v0.4s ; ret -function %f77(f64x2) -> f64x2 { +function %f69(f64x2) -> f64x2 { block0(v0: f64x2): v1 = nearest v0 return v1 @@ -911,7 +769,7 @@ block0(v0: f64x2): ; frintn v0.2d, v0.2d ; ret -function %f78(f32x4, f32x4, f32x4) -> f32x4 { +function %f70(f32x4, f32x4, f32x4) -> f32x4 { block0(v0: f32x4, v1: f32x4, v2: f32x4): v3 = fma v0, v1, v2 return v3 @@ -922,7 +780,7 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): ; mov v0.16b, v2.16b ; ret -function %f79(f32x2, f32x2, f32x2) -> f32x2 { +function %f71(f32x2, f32x2, f32x2) -> f32x2 { block0(v0: f32x2, v1: f32x2, v2: f32x2): v3 = fma v0, v1, v2 return v3 @@ -933,7 +791,7 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2): ; mov v0.16b, v2.16b ; ret -function %f80(f64x2, f64x2, f64x2) -> f64x2 { +function %f72(f64x2, f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2, v2: f64x2): v3 = fma v0, v1, v2 return v3 @@ -944,7 +802,7 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): ; mov v0.16b, v2.16b ; ret -function %f81(f32x2, f32x2) -> f32x2 { +function %f73(f32x2, f32x2) -> f32x2 { block0(v0: f32x2, v1: f32x2): v2 = fcopysign v0, v1 return v2 @@ -955,7 +813,7 @@ block0(v0: f32x2, v1: f32x2): ; sli v0.2s, v0.2s, v5.2s, #31 ; ret -function %f82(f32x4, f32x4) -> f32x4 { +function %f74(f32x4, f32x4) -> f32x4 { block0(v0: f32x4, v1: f32x4): v2 = fcopysign v0, v1 return v2 @@ -966,7 +824,7 @@ block0(v0: f32x4, v1: f32x4): ; sli v0.4s, v0.4s, v5.4s, #31 ; ret -function %f83(f64x2, f64x2) -> f64x2 { +function %f75(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): v2 = fcopysign v0, v1 return v2 @@ -976,4 +834,3 @@ block0(v0: f64x2, v1: f64x2): ; ushr v5.2d, v1.2d, #63 ; sli v0.2d, v0.2d, v5.2d, #63 ; ret - diff --git a/cranelift/filetests/filetests/runtests/fcvt-sat-small.clif b/cranelift/filetests/filetests/runtests/fcvt-sat-small.clif new file mode 100644 index 000000000000..977336e90318 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fcvt-sat-small.clif @@ -0,0 +1,132 @@ +test run +target aarch64 +target s390x +; x86_64 does not support `fcvt_to_{u,s}int_sat` to integers < 32 bits. + +function %fcvt_to_sint_sat_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} +; run: %fcvt_to_sint_sat_i8(0x0.0) == 0 +; run: %fcvt_to_sint_sat_i8(0x1.0) == 1 +; run: %fcvt_to_sint_sat_i8(0x1.d6f346p26) == 127 +; run: %fcvt_to_sint_sat_i8(0x8.1) == 8 +; run: %fcvt_to_sint_sat_i8(-0x1.0) == -1 +; run: %fcvt_to_sint_sat_i8(0xB2D05E00.0) == 127 +; run: %fcvt_to_sint_sat_i8(-0xB2D05E00.0) == -128 +; run: %fcvt_to_sint_sat_i8(0x1.fffffep127) == 127 +; run: %fcvt_to_sint_sat_i8(-0x1.fffffep127) == -128 +; run: %fcvt_to_sint_sat_i8(NaN) == 0 + +function %fcvt_to_uint_sat_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %fcvt_to_uint_sat_i8(0x0.0) == 0 +; run: %fcvt_to_uint_sat_i8(0x1.0) == 1 +; run: %fcvt_to_uint_sat_i8(0x1.d6f346p26) == 255 +; run: %fcvt_to_uint_sat_i8(0x8.1) == 8 +; run: %fcvt_to_uint_sat_i8(-0x1.0) == 0 +; run: %fcvt_to_uint_sat_i8(0xB2D05E00.0) == 255 +; run: %fcvt_to_uint_sat_i8(-0xB2D05E00.0) == 0 +; run: %fcvt_to_uint_sat_i8(0x1.fffffep127) == 255 +; run: %fcvt_to_uint_sat_i8(-0x1.fffffep127) == 0 +; run: %fcvt_to_uint_sat_i8(NaN) == 0 + +function %fcvt_to_sint_sat_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} +; run: %fcvt_to_sint_sat_i16(0x0.0) == 0 +; run: %fcvt_to_sint_sat_i16(0x1.0) == 1 +; run: %fcvt_to_sint_sat_i16(0x1.d6f346p26) == 32767 +; run: %fcvt_to_sint_sat_i16(0x8.1) == 8 +; run: %fcvt_to_sint_sat_i16(-0x1.0) == -1 +; run: %fcvt_to_sint_sat_i16(0xB2D05E00.0) == 32767 +; run: %fcvt_to_sint_sat_i16(-0xB2D05E00.0) == -32768 +; run: %fcvt_to_sint_sat_i16(0x1.fffffep127) == 32767 +; run: %fcvt_to_sint_sat_i16(-0x1.fffffep127) == -32768 +; run: %fcvt_to_sint_sat_i16(NaN) == 0 + +function %fcvt_to_uint_sat_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %fcvt_to_uint_sat_i16(0x0.0) == 0 +; run: %fcvt_to_uint_sat_i16(0x1.0) == 1 +; run: %fcvt_to_uint_sat_i16(0x1.d6f346p26) == 65535 +; run: %fcvt_to_uint_sat_i16(0x8.1) == 8 +; run: %fcvt_to_uint_sat_i16(-0x1.0) == 0 +; run: %fcvt_to_uint_sat_i16(0xB2D05E00.0) == 65535 +; run: %fcvt_to_uint_sat_i16(-0xB2D05E00.0) == 0 +; run: %fcvt_to_uint_sat_i16(0x1.fffffep127) == 65535 +; run: %fcvt_to_uint_sat_i16(-0x1.fffffep127) == 0 +; run: %fcvt_to_uint_sat_i16(NaN) == 0 + +function %fcvt_to_sint_sat_i8_f64(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} +; run: %fcvt_to_sint_sat_i8_f64(0x0.0) == 0 +; run: %fcvt_to_sint_sat_i8_f64(0x1.0) == 1 +; run: %fcvt_to_sint_sat_i8_f64(0x1.d6f346p26) == 127 +; run: %fcvt_to_sint_sat_i8_f64(0x8.1) == 8 +; run: %fcvt_to_sint_sat_i8_f64(-0x1.0) == -1 +; run: %fcvt_to_sint_sat_i8_f64(0xB2D05E00.0) == 127 +; run: %fcvt_to_sint_sat_i8_f64(-0xB2D05E00.0) == -128 +; run: %fcvt_to_sint_sat_i8_f64(0x1.fffffffffffffp1023) == 127 +; run: %fcvt_to_sint_sat_i8_f64(-0x1.fffffffffffffp1023) == -128 +; run: %fcvt_to_sint_sat_i8_f64(NaN) == 0 + +function %fcvt_to_uint_sat_i8_f64(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} +; run: %fcvt_to_uint_sat_i8_f64(0x0.0) == 0 +; run: %fcvt_to_uint_sat_i8_f64(0x1.0) == 1 +; run: %fcvt_to_uint_sat_i8_f64(0x1.d6f346p26) == 255 +; run: %fcvt_to_uint_sat_i8_f64(0x8.1) == 8 +; run: %fcvt_to_uint_sat_i8_f64(-0x1.0) == 0 +; run: %fcvt_to_uint_sat_i8_f64(0xB2D05E00.0) == 255 +; run: %fcvt_to_uint_sat_i8_f64(-0xB2D05E00.0) == 0 +; run: %fcvt_to_uint_sat_i8_f64(0x1.fffffffffffffp1023) == 255 +; run: %fcvt_to_sint_sat_i8_f64(-0x1.fffffffffffffp1023) == 0 +; run: %fcvt_to_uint_sat_i8_f64(NaN) == 0 + +function %fcvt_to_sint_sat_i16_f64(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} +; run: %fcvt_to_sint_sat_i16_f64(0x0.0) == 0 +; run: %fcvt_to_sint_sat_i16_f64(0x1.0) == 1 +; run: %fcvt_to_sint_sat_i16_f64(0x1.d6f346p26) == 32767 +; run: %fcvt_to_sint_sat_i16_f64(0x8.1) == 8 +; run: %fcvt_to_sint_sat_i16_f64(-0x1.0) == -1 +; run: %fcvt_to_sint_sat_i16_f64(0xB2D05E00.0) == 32767 +; run: %fcvt_to_sint_sat_i16_f64(-0xB2D05E00.0) == -32768 +; run: %fcvt_to_sint_sat_i16_f64(0x1.fffffffffffffp1023) == 32767 +; run: %fcvt_to_sint_sat_i16_f64(-0x1.fffffffffffffp1023) == -32768 +; run: %fcvt_to_sint_sat_i16_f64(NaN) == 0 + +function %fcvt_to_uint_sat_i16_f64(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} +; run: %fcvt_to_uint_sat_i16_f64(0x0.0) == 0 +; run: %fcvt_to_uint_sat_i16_f64(0x1.0) == 1 +; run: %fcvt_to_uint_sat_i16_f64(0x1.d6f346p26) == 65535 +; run: %fcvt_to_uint_sat_i16_f64(0x8.1) == 8 +; run: %fcvt_to_uint_sat_i16_f64(-0x1.0) == 0 +; run: %fcvt_to_uint_sat_i16_f64(0xB2D05E00.0) == 65535 +; run: %fcvt_to_uint_sat_i16_f64(-0xB2D05E00.0) == 0 +; run: %fcvt_to_uint_sat_i16_f64(0x1.fffffffffffffp1023) == 65535 +; run: %fcvt_to_uint_sat_i16_f64(-0x1.fffffffffffffp1023) == 0 +; run: %fcvt_to_uint_sat_i16_f64(NaN) == 0