diff --git a/build.rs b/build.rs index 0c858e889390..96935c823edb 100644 --- a/build.rs +++ b/build.rs @@ -241,12 +241,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "simd_f64x2_cmp", "simd_f64x2_pmin_pmax", "simd_f64x2_rounding", - "simd_i16x8_cmp", - "simd_i32x4_cmp", "simd_i32x4_trunc_sat_f32x4", "simd_i32x4_trunc_sat_f64x2", - "simd_i64x2_cmp", - "simd_i8x16_cmp", "simd_load", "simd_splat", ] diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 2d0e83eb68e4..ab77ecbeaeb1 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -357,7 +357,14 @@ impl VecAluOpRRR { VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, - VecAluOpRRR::VmsltVX => 0b011011, + VecAluOpRRR::VmseqVV | VecAluOpRRR::VmseqVX => 0b011000, + VecAluOpRRR::VmsneVV | VecAluOpRRR::VmsneVX => 0b011001, + VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX => 0b011010, + VecAluOpRRR::VmsltVV | VecAluOpRRR::VmsltVX => 0b011011, + VecAluOpRRR::VmsleuVV | VecAluOpRRR::VmsleuVX => 0b011100, + VecAluOpRRR::VmsleVV | VecAluOpRRR::VmsleVX => 0b011101, + VecAluOpRRR::VmsgtuVX => 0b011110, + VecAluOpRRR::VmsgtVX => 0b011111, } } @@ -381,7 +388,13 @@ impl VecAluOpRRR { | VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxVV | VecAluOpRRR::VmergeVVM - | VecAluOpRRR::VrgatherVV => VecOpCategory::OPIVV, + | VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsltuVV + | VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddWV | VecAluOpRRR::VwadduVV @@ -427,8 +440,15 @@ impl VecAluOpRRR { | VecAluOpRRR::VmaxVX | VecAluOpRRR::VslidedownVX | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmsltVX - | VecAluOpRRR::VrgatherVX => VecOpCategory::OPIVX, + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmsgtuVX + | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, VecAluOpRRR::VfaddVV | VecAluOpRRR::VfsubVV | VecAluOpRRR::VfmulVV @@ -522,6 +542,12 @@ impl VecAluOpRRImm5 { VecAluOpRRImm5::VsaddVI => 0b100001, VecAluOpRRImm5::VrgatherVI => 0b001100, VecAluOpRRImm5::VmvrV => 0b100111, + VecAluOpRRImm5::VmseqVI => 0b011000, + VecAluOpRRImm5::VmsneVI => 0b011001, + VecAluOpRRImm5::VmsleuVI => 0b011100, + VecAluOpRRImm5::VmsleVI => 0b011101, + VecAluOpRRImm5::VmsgtuVI => 0b011110, + VecAluOpRRImm5::VmsgtVI => 0b011111, } } @@ -541,7 +567,13 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VsadduVI | VecAluOpRRImm5::VsaddVI | VecAluOpRRImm5::VrgatherVI - | VecAluOpRRImm5::VmvrV => VecOpCategory::OPIVI, + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, } } @@ -561,7 +593,13 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VxorVI | VecAluOpRRImm5::VmergeVIM | VecAluOpRRImm5::VsadduVI - | VecAluOpRRImm5::VsaddVI => false, + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => false, } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index fa8d08564b48..256f41ceab32 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -128,6 +128,13 @@ (VredminuVS) (VrgatherVV) (VcompressVM) + (VmseqVV) + (VmsneVV) + (VmsltuVV) + (VmsltVV) + (VmsleuVV) + (VmsleVV) + ;; Vector-Scalar Opcodes (VaddVX) @@ -169,7 +176,14 @@ (VmergeVXM) (VfmergeVFM) (VrgatherVX) + (VmseqVX) + (VmsneVX) + (VmsltuVX) (VmsltVX) + (VmsleuVX) + (VmsleVX) + (VmsgtuVX) + (VmsgtVX) )) @@ -199,6 +213,12 @@ ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc... ;; The immediate field specifies how many registers should be copied. (VmvrV) + (VmseqVI) + (VmsneVI) + (VmsleuVI) + (VmsleVI) + (VmsgtuVI) + (VmsgtVI) )) ;; Imm only ALU Ops @@ -969,11 +989,126 @@ (rule (rv_vcompress_vm vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate)) -;; Helper for emitting the `vmslt.vx` (Vector Mask Set Less Than) instruction. +;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmseq_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsne_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction. (decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg) (rule (rv_vmslt_vx vs2 vs1 mask vstate) (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate)) +;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsleu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsle_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction. +;; This is an alias for `vmsltu.vv` with the operands inverted. +(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction. +;; This is an alias for `vmslt.vv` with the operands inverted. +(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgt_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction. +;; This is an alias for `vmsleu.vv` with the operands inverted. +(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction. +;; This is an alias for `vmsle.vv` with the operands inverted. +(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) + ;; Helper for emitting the `vzext.vf2` instruction. ;; Zero-extend SEW/2 source to SEW destination (decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) @@ -1078,3 +1213,120 @@ (rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src) (if-let amt (u64_udiv (ty_lane_count ty) 2)) (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty)) + + +;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled +;; lanes are set to all zeros. +(decl gen_expand_mask (Type VReg) VReg) +(rule (gen_expand_mask ty mask) + (if-let zero (imm5_from_i8 0)) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty)) + + +;; Builds a vector mask corresponding to the IntCC operation. +(decl gen_icmp_mask (Type IntCC Value Value) VReg) + +;; IntCC.Equal + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) + (rv_vmseq_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y)) + (rv_vmseq_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y) + (rv_vmseq_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (replicated_imm5 y)) + (rv_vmseq_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (replicated_imm5 x) y) + (rv_vmseq_vi y x (unmasked) ty)) + +;; IntCC.NotEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) + (rv_vmsne_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y)) + (rv_vmsne_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y) + (rv_vmsne_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (replicated_imm5 y)) + (rv_vmsne_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (replicated_imm5 x) y) + (rv_vmsne_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) + (rv_vmsltu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y)) + (rv_vmsltu_vx x y (unmasked) ty)) + +;; IntCC.SignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) + (rv_vmslt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y)) + (rv_vmslt_vx x y (unmasked) ty)) + +;; IntCC.UnsignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) + (rv_vmsleu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y)) + (rv_vmsleu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsleu_vi x y (unmasked) ty)) + +;; IntCC.SignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) + (rv_vmsle_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y)) + (rv_vmsle_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsle_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) + (rv_vmsgtu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y)) + (rv_vmsgtu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgtu_vi x y (unmasked) ty)) + +;; IntCC.SignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) + (rv_vmsgt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y)) + (rv_vmsgt_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgt_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) + (rv_vmsgeu_vv x y (unmasked) ty)) + +;; IntCC.SignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) + (rv_vmsge_vv x y (unmasked) ty)) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 6978d0785d8e..3c578e2abdef 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1454,10 +1454,13 @@ result)) ;;;;; Rules for `icmp`;;;;;;;;; -(rule - (lower (icmp cc x @ (value_type ty) y)) +(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y)) (lower_icmp cc x y ty)) +(rule 1 (lower (icmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_icmp_mask ty cc x y))) + + ;;;;; Rules for `fcmp`;;;;;;;;; (rule (lower (fcmp cc x @ (value_type ty) y)) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif new file mode 100644 index 000000000000..3f6498ba6d38 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + + + + + +function %simd_icmp_splat_rhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x62 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x62 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif new file mode 100644 index 000000000000..e53a2b9a9b34 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif @@ -0,0 +1,364 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x66 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x66 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif new file mode 100644 index 000000000000..2980f22d21f9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif @@ -0,0 +1,372 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif new file mode 100644 index 000000000000..7719b148c7cb --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sgt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sgt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgt.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sgt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sgt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sgt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sgt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgt.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x7e +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sgt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sgt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif new file mode 100644 index 000000000000..fc1a573ce8d8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sle_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sle v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sle_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sle v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sle_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sle v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x76 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sle_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sle v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif new file mode 100644 index 000000000000..b5f9ce01942b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif @@ -0,0 +1,370 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_slt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp slt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_slt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp slt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_slt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp slt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a4,10 +; vmslt.vx v0,v1,a4 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a4, zero, 0xa +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x17, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_slt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp slt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif new file mode 100644 index 000000000000..92d0e41db531 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif @@ -0,0 +1,372 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_uge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_uge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_uge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_uge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif new file mode 100644 index 000000000000..c1de9d668a70 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ugt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgtu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x7a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ugt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ugt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgtu.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x7a +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ugt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif new file mode 100644 index 000000000000..d1ed0549676e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ule_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ule_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ule_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x72 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ule_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif new file mode 100644 index 000000000000..ceda96cc01ba --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif @@ -0,0 +1,370 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ult_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ult_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ult_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a4,10 +; vmsltu.vx v0,v1,a4 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a4, zero, 0xa +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x17, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ult_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif index f81d29cd30be..203b9bbc0c79 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif @@ -5,6 +5,7 @@ target x86_64 target x86_64 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif index 0d1a548d2e71..3a28260e5dde 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif index b6e8cdc1c81d..72a3e22b7e73 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif index b6661c0fe374..56ac1fa63240 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif index d5c3acf56773..cb281e9a0eb5 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif index 678e755f58c1..9bac309adc2c 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif index b1095c4ebde9..074ef56ecbe3 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif index 708e2bac71fd..04853480ad50 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif index 6b02fae5d035..699600c2c66d 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif index 35ca0b7443fd..c936fe7a8d84 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16):