diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 32968393af10..569d5c26b865 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1599,6 +1599,21 @@ (rule (rv_max rs1 rs2) (alu_rrr (AluOPRRR.Max) rs1 rs2)) +;; Helper for emitting the `maxu` instruction. +(decl rv_maxu (XReg XReg) XReg) +(rule (rv_maxu rs1 rs2) + (alu_rrr (AluOPRRR.Maxu) rs1 rs2)) + +;; Helper for emitting the `min` instruction. +(decl rv_min (XReg XReg) XReg) +(rule (rv_min rs1 rs2) + (alu_rrr (AluOPRRR.Max) rs1 rs2)) + +;; Helper for emitting the `minu` instruction. +(decl rv_minu (XReg XReg) XReg) +(rule (rv_minu rs1 rs2) + (alu_rrr (AluOPRRR.Minu) rs1 rs2)) + ;; Helper for emitting the `sext.b` instruction. (decl rv_sextb (XReg) XReg) (rule (rv_sextb rs1) @@ -2546,11 +2561,34 @@ (extern constructor gen_stack_addr gen_stack_addr) (decl gen_select_xreg (IntegerCompare XReg XReg) XReg) -(rule (gen_select_xreg c x y) + +(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y) + (if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc)) + (if-let $true (has_zbb)) + (rv_minu x y)) + +(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y) + (if-let (IntCC.SignedLessThan) (intcc_without_eq cc)) + (if-let $true (has_zbb)) + (rv_min x y)) + +(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y) + (if-let (IntCC.UnsignedGreaterThan) (intcc_without_eq cc)) + (if-let $true (has_zbb)) + (rv_maxu x y)) + +(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y) + (if-let (IntCC.SignedGreaterThan) (intcc_without_eq cc)) + (if-let $true (has_zbb)) + (rv_max x y)) + +(rule 0 (gen_select_xreg c x y) (let ((dst WritableReg (temp_writable_xreg)) (_ Unit (emit (MInst.Select dst c x y)))) (writable_reg_to_reg dst))) + + (decl gen_select_vreg (IntegerCompare VReg VReg) VReg) (rule (gen_select_vreg c x y) (let @@ -2649,6 +2687,10 @@ (decl int_compare (IntCC XReg XReg) IntegerCompare) (extern constructor int_compare int_compare) +;; Extract the components of an `IntegerCompare` +(decl int_compare_decompose (IntCC XReg XReg) IntegerCompare) +(extern extractor infallible int_compare_decompose int_compare_decompose) + (decl label_to_br_target (MachLabel) CondBrTarget) (extern constructor label_to_br_target label_to_br_target) (convert MachLabel CondBrTarget label_to_br_target) @@ -2826,17 +2868,6 @@ (i128_sub (value_regs_zero) val)) -;; Selects the greatest of two registers as signed values. -(decl max (Type XReg XReg) XReg) -(rule (max (fits_in_64 (ty_int ty)) x y) - (if-let $true (has_zbb)) - (rv_max x y)) - -(rule (max (fits_in_64 (ty_int ty)) x y) - (if-let $false (has_zbb)) - (gen_select_xreg (cmp_gt x y) x y)) - - ;; Builds an instruction sequence that traps if the comparision succeeds. (decl gen_trapif (IntCC XReg XReg TrapCode) InstOutput) (rule (gen_trapif cc a b trap_code) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 0c6fbf248e87..03ee2a93d234 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -2044,7 +2044,7 @@ (rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x))) (let ((extended XReg (sext x)) (negated XReg (rv_neg extended))) - (max $I64 extended negated))) + (gen_select_xreg (cmp_gt extended negated) extended negated))) ;; For vectors we generate the same code, but with vector instructions ;; we can skip the sign extension, since the vector unit will only process diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 699ed57b0cec..b106cd77f7ed 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -482,6 +482,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> } } + #[inline] + fn int_compare_decompose(&mut self, cmp: IntegerCompare) -> (IntCC, XReg, XReg) { + (cmp.kind, self.xreg_new(cmp.rs1), self.xreg_new(cmp.rs2)) + } + #[inline] fn vstate_from_type(&mut self, ty: Type) -> VState { VState::from_type(ty) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 281b665c7eec..e5cb338a40cf 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1669,15 +1669,6 @@ (decl xmi_imm (u32) XmmMemImm) (extern constructor xmi_imm xmi_imm) -;;;; Helpers for Working With Integer Comparison Codes ;;;;;;;;;;;;;;;;;;;;;;;;; -;; - -;; This is a direct import of `IntCC::without_equal`. -;; Get the corresponding IntCC with the equal component removed. -;; For conditions without a zero component, this is a no-op. -(decl intcc_without_eq (IntCC) IntCC) -(extern constructor intcc_without_eq intcc_without_eq) - ;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;; (type RegisterClass diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 99d454a70cd6..5908ad58aa61 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -623,11 +623,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { } } - #[inline] - fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC { - x.without_equal() - } - #[inline] fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC { CC::from_intcc(*intcc) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 26332de5a88b..4353ae388c6d 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -846,6 +846,11 @@ macro_rules! isle_common_prelude_methods { cc.complement() } + #[inline] + fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC { + x.without_equal() + } + #[inline] fn floatcc_swap_args(&mut self, cc: &FloatCC) -> FloatCC { cc.swap_args() diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index da37334b1d12..5f389dbc9df4 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -337,6 +337,12 @@ (decl intcc_complement (IntCC) IntCC) (extern constructor intcc_complement intcc_complement) +;; This is a direct import of `IntCC::without_equal`. +;; Get the corresponding IntCC with the equal component removed. +;; For conditions without a zero component, this is a no-op. +(decl pure intcc_without_eq (IntCC) IntCC) +(extern constructor intcc_without_eq intcc_without_eq) + ;; Swap args of a FloatCC flag. (decl floatcc_swap_args (FloatCC) FloatCC) (extern constructor floatcc_swap_args floatcc_swap_args) diff --git a/cranelift/filetests/filetests/isa/riscv64/smax-zbb.clif b/cranelift/filetests/filetests/isa/riscv64/smax-zbb.clif new file mode 100644 index 000000000000..25812fb06058 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/smax-zbb.clif @@ -0,0 +1,135 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbb + +function %smax_i8(i8, i8) -> i8{ +block0(v0: i8, v1: i8): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.b a3,a0 +; sext.b a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x93, 0x16, 0x45, 0x60 +; .byte 0x93, 0x97, 0x45, 0x60 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smax_i16(i16, i16) -> i16{ +block0(v0: i16, v1: i16): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.h a3,a0 +; sext.h a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x93, 0x16, 0x55, 0x60 +; .byte 0x93, 0x97, 0x55, 0x60 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smax_i32(i32, i32) -> i32{ +block0(v0: i32, v1: i32): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.w a3,a0 +; sext.w a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sext.w a3, a0 +; sext.w a5, a1 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smax_i64(i64, i64) -> i64{ +block0(v0: i64, v1: i64): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; block0: +; max a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x33, 0x65, 0xb5, 0x0a +; ret + +function %smax_i128(i128, i128) -> i128{ +block0(v0: i128, v1: i128): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s3,-8(sp) +; add sp,-16 +; block0: +; sgt a5,[a0,a1],[a2,a3]##ty=i128 +; mv a4,a0 +; mv s3,a1 +; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero) +; add sp,+16 +; ld s3,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; sd s3, -8(sp) +; addi sp, sp, -0x10 +; block1: ; offset 0x18 +; blt a3, a1, 0xc +; bne a1, a3, 0x10 +; bgeu a2, a0, 0xc +; addi a5, zero, 1 +; j 8 +; mv a5, zero +; mv a4, a0 +; mv s3, a1 +; beqz a5, 0x10 +; mv a0, a4 +; mv a1, s3 +; j 0xc +; mv a0, a2 +; mv a1, a3 +; addi sp, sp, 0x10 +; ld s3, -8(sp) +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/smin-zbb.clif b/cranelift/filetests/filetests/isa/riscv64/smin-zbb.clif new file mode 100644 index 000000000000..fa67cb0d998d --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/smin-zbb.clif @@ -0,0 +1,135 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbb + +function %smin_i8(i8, i8) -> i8{ +block0(v0: i8, v1: i8): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.b a3,a0 +; sext.b a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x93, 0x16, 0x45, 0x60 +; .byte 0x93, 0x97, 0x45, 0x60 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smin_i16(i16, i16) -> i16{ +block0(v0: i16, v1: i16): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.h a3,a0 +; sext.h a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x93, 0x16, 0x55, 0x60 +; .byte 0x93, 0x97, 0x55, 0x60 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smin_i32(i32, i32) -> i32{ +block0(v0: i32, v1: i32): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; block0: +; sext.w a3,a0 +; sext.w a5,a1 +; max a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sext.w a3, a0 +; sext.w a5, a1 +; .byte 0x33, 0xe5, 0xf6, 0x0a +; ret + +function %smin_i64(i64, i64) -> i64{ +block0(v0: i64, v1: i64): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; block0: +; max a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x33, 0x65, 0xb5, 0x0a +; ret + +function %smin_i128(i128, i128) -> i128{ +block0(v0: i128, v1: i128): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s3,-8(sp) +; add sp,-16 +; block0: +; slt a5,[a0,a1],[a2,a3]##ty=i128 +; mv a4,a0 +; mv s3,a1 +; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero) +; add sp,+16 +; ld s3,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; sd s3, -8(sp) +; addi sp, sp, -0x10 +; block1: ; offset 0x18 +; blt a1, a3, 0xc +; bne a1, a3, 0x10 +; bgeu a0, a2, 0xc +; addi a5, zero, 1 +; j 8 +; mv a5, zero +; mv a4, a0 +; mv s3, a1 +; beqz a5, 0x10 +; mv a0, a4 +; mv a1, s3 +; j 0xc +; mv a0, a2 +; mv a1, a3 +; addi sp, sp, 0x10 +; ld s3, -8(sp) +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/umax-zbb.clif b/cranelift/filetests/filetests/isa/riscv64/umax-zbb.clif new file mode 100644 index 000000000000..1eb8f37baa6b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/umax-zbb.clif @@ -0,0 +1,139 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbb + +function %umax_i8(i8, i8) -> i8{ +block0(v0: i8, v1: i8): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; block0: +; andi a3,a0,255 +; andi a5,a1,255 +; maxu a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; andi a3, a0, 0xff +; andi a5, a1, 0xff +; .byte 0x33, 0xf5, 0xf6, 0x0a +; ret + +function %umax_i16(i16, i16) -> i16{ +block0(v0: i16, v1: i16): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; block0: +; zext.h a3,a0 +; zext.h a5,a1 +; maxu a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xbb, 0x46, 0x05, 0x08 +; .byte 0xbb, 0xc7, 0x05, 0x08 +; .byte 0x33, 0xf5, 0xf6, 0x0a +; ret + +function %umax_i32(i32, i32) -> i32{ +block0(v0: i32, v1: i32): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; block0: +; slli a3,a0,32 +; srli a5,a3,32 +; slli a1,a1,32 +; srli a3,a1,32 +; maxu a0,a5,a3 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a3, a0, 0x20 +; srli a5, a3, 0x20 +; slli a1, a1, 0x20 +; srli a3, a1, 0x20 +; .byte 0x33, 0xf5, 0xd7, 0x0a +; ret + +function %umax_i64(i64, i64) -> i64{ +block0(v0: i64, v1: i64): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; block0: +; maxu a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x33, 0x75, 0xb5, 0x0a +; ret + +function %umax_i128(i128, i128) -> i128{ +block0(v0: i128, v1: i128): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s3,-8(sp) +; add sp,-16 +; block0: +; ugt a5,[a0,a1],[a2,a3]##ty=i128 +; mv a4,a0 +; mv s3,a1 +; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero) +; add sp,+16 +; ld s3,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; sd s3, -8(sp) +; addi sp, sp, -0x10 +; block1: ; offset 0x18 +; bltu a3, a1, 0xc +; bne a1, a3, 0x10 +; bgeu a2, a0, 0xc +; addi a5, zero, 1 +; j 8 +; mv a5, zero +; mv a4, a0 +; mv s3, a1 +; beqz a5, 0x10 +; mv a0, a4 +; mv a1, s3 +; j 0xc +; mv a0, a2 +; mv a1, a3 +; addi sp, sp, 0x10 +; ld s3, -8(sp) +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/umin-zbb.clif b/cranelift/filetests/filetests/isa/riscv64/umin-zbb.clif new file mode 100644 index 000000000000..2a2d522b5632 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/umin-zbb.clif @@ -0,0 +1,139 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbb + +function %umin_i8(i8, i8) -> i8{ +block0(v0: i8, v1: i8): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; block0: +; andi a3,a0,255 +; andi a5,a1,255 +; minu a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; andi a3, a0, 0xff +; andi a5, a1, 0xff +; .byte 0x33, 0xd5, 0xf6, 0x0a +; ret + +function %umin_i16(i16, i16) -> i16{ +block0(v0: i16, v1: i16): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; block0: +; zext.h a3,a0 +; zext.h a5,a1 +; minu a0,a3,a5 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0xbb, 0x46, 0x05, 0x08 +; .byte 0xbb, 0xc7, 0x05, 0x08 +; .byte 0x33, 0xd5, 0xf6, 0x0a +; ret + +function %umin_i32(i32, i32) -> i32{ +block0(v0: i32, v1: i32): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; block0: +; slli a3,a0,32 +; srli a5,a3,32 +; slli a1,a1,32 +; srli a3,a1,32 +; minu a0,a5,a3 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; slli a3, a0, 0x20 +; srli a5, a3, 0x20 +; slli a1, a1, 0x20 +; srli a3, a1, 0x20 +; .byte 0x33, 0xd5, 0xd7, 0x0a +; ret + +function %umin_i64(i64, i64) -> i64{ +block0(v0: i64, v1: i64): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; block0: +; minu a0,a0,a1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x33, 0x55, 0xb5, 0x0a +; ret + +function %umin_i128(i128, i128) -> i128{ +block0(v0: i128, v1: i128): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s3,-8(sp) +; add sp,-16 +; block0: +; ult a5,[a0,a1],[a2,a3]##ty=i128 +; mv a4,a0 +; mv s3,a1 +; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero) +; add sp,+16 +; ld s3,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; sd s3, -8(sp) +; addi sp, sp, -0x10 +; block1: ; offset 0x18 +; bltu a1, a3, 0xc +; bne a1, a3, 0x10 +; bgeu a0, a2, 0xc +; addi a5, zero, 1 +; j 8 +; mv a5, zero +; mv a4, a0 +; mv s3, a1 +; beqz a5, 0x10 +; mv a0, a4 +; mv a1, s3 +; j 0xc +; mv a0, a2 +; mv a1, a3 +; addi sp, sp, 0x10 +; ld s3, -8(sp) +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret +