Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve fcvt_to_{u,s}int_sat lowering (AArch64) #4913

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 31 additions & 42 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1635,22 +1635,6 @@
(decl max_fp_value (bool u8 u8) Reg)
(extern constructor max_fp_value max_fp_value)

;; Calculate the minimum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl min_fp_value_sat (bool u8 u8) Reg)
(extern constructor min_fp_value_sat min_fp_value_sat)

;; Calculate the maximum acceptable floating-point value for a conversion to
;; floating point from an integer type.
;; Accepts whether the output is signed, the size of the input
;; floating point type in bits, and the size of the output integer type
;; in bits.
(decl max_fp_value_sat (bool u8 u8) Reg)
(extern constructor max_fp_value_sat max_fp_value_sat)

;; Constructs an FPUOpRI.Ushr* given the size in bits of the value (or lane)
;; and the amount to shift by.
(decl fpu_op_ri_ushr (u8 u8) FPUOpRI)
Expand Down Expand Up @@ -3129,32 +3113,37 @@
;; floating-point value to an integer, saturating if the value
;; does not fit in the target type.
;; Accepts the specific conversion op, the source register,
;; whether the input is signed, and finally the input and output
;; types.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type Type) Reg)
(rule (fpu_to_int_cvt_sat op src $true in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $true in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(zero Reg (constant_f128 0))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) zero tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
(rule (fpu_to_int_cvt_sat op src $false in_ty out_ty)
(let ((size ScalarSize (scalar_size in_ty))
(in_bits u8 (ty_bits in_ty))
(out_bits u8 (ty_bits out_ty))
(max Reg (max_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Min) src max size))
(min Reg (min_fp_value_sat $false in_bits out_bits))
(tmp Reg (fpu_rrr (FPUOp2.Max) tmp min size))
(tmp ValueRegs (with_flags (fpu_cmp size src src)
(fpu_csel in_ty (Cond.Ne) min tmp))))
(fpu_to_int op (value_regs_get tmp 0))))
;; whether the input is signed, and finally the output type.
(decl fpu_to_int_cvt_sat (FpuToIntOp Reg bool Type) Reg)
(rule (fpu_to_int_cvt_sat op src _ $I64)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src _ $I32)
(fpu_to_int op src))
(rule (fpu_to_int_cvt_sat op src $false (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm out_ty (ImmExtend.Zero) -1)))
(with_flags_reg
(cmp (OperandSize.Size32) result max)
(csel (Cond.Hi) max result))))
(rule (fpu_to_int_cvt_sat op src $true (fits_in_16 out_ty))
(let ((result Reg (fpu_to_int op src))
(max Reg (imm $I32 (ImmExtend.Sign) (signed_max out_ty)))
(min Reg (imm $I32 (ImmExtend.Sign) (signed_min out_ty)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result max)
(csel (Cond.Gt) max result)))
(result Reg (with_flags_reg
(cmp (operand_size out_ty) result min)
(csel (Cond.Lt) min result))))
result))

(decl signed_min (Type) u64)
(rule (signed_min $I8) -128)
(rule (signed_min $I16) -32768)

(decl signed_max (Type) u64)
(rule (signed_max $I8) 127)
(rule (signed_max $I16) 32767)

(decl fpu_to_int (FpuToIntOp Reg) Reg)
(rule (fpu_to_int op src)
Expand Down
24 changes: 12 additions & 12 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -472,17 +472,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzu) x (vector_size ty)))

(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty))

(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64))

(rule (lower (has_type $I32 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty))

(rule (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64))

;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -492,17 +492,17 @@
(rule (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _)))))
(vec_misc (VecMisc2.Fcvtzs) x (vector_size ty)))

(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true $F32 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty))

(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32))))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $F32 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64))

(rule (lower (has_type $I32 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true $F64 $I32))
(rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty))

(rule (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64))))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $F64 $I64))
(fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64))

;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
62 changes: 0 additions & 62 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,68 +637,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
tmp.to_reg()
}

fn min_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();

let min: f64 = match (out_bits, signed) {
(32, true) => i32::MIN as f64,
(32, false) => 0.0,
(64, true) => i64::MIN as f64,
(64, false) => 0.0,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};

if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, min as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, min)
} else {
unimplemented!(
"unexpected input size for min_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}

tmp.to_reg()
}

fn max_fp_value_sat(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();

let max = match (out_bits, signed) {
(32, true) => i32::MAX as f64,
(32, false) => u32::MAX as f64,
(64, true) => i64::MAX as f64,
(64, false) => u64::MAX as f64,
_ => unimplemented!(
"unexpected {} output size of {} bits",
if signed { "signed" } else { "unsigned" },
out_bits
),
};

if in_bits == 32 {
lower_constant_f32(self.lower_ctx, tmp, max as f32)
} else if in_bits == 64 {
lower_constant_f64(self.lower_ctx, tmp, max)
} else {
unimplemented!(
"unexpected input size for max_fp_value_sat: {} (signed: {}, output size: {})",
in_bits,
signed,
out_bits
);
}

tmp.to_reg()
}

fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
if ty_bits == 32 {
FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(shift, ty_bits).unwrap())
Expand Down
Loading