diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 6289ef2b147f..edfdb5fe7878 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -326,8 +326,8 @@ (VecAluRRR (op VecAluOpRRR) (vd WritableReg) - (vs1 Reg) (vs2 Reg) + (vs1 Reg) (vstate VState)) (VecSetState diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 38d73f2dbfd2..7c4971bca36f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -50,7 +50,7 @@ pub fn encode_valu( ) -> u32 { let funct6 = funct6 & 0b111111; let vm = vm & 0b1; - let funct7 = (funct6 << 6) | vm; + let funct7 = (funct6 << 1) | vm; encode_r_type(opcode, vd, funct3, vs1, vs2, funct7) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 583896a02f02..1d17f7c73964 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -214,27 +214,42 @@ impl fmt::Display for VState { impl VecAluOpRRR { pub fn opcode(&self) -> u32 { - match self { - VecAluOpRRR::Vadd => 0x57, - } + // Vector Opcode + 0x57 } pub fn funct3(&self) -> u32 { match self { - VecAluOpRRR::Vadd => 0b000, + // OPIVV + VecAluOpRRR::Vadd + | VecAluOpRRR::Vsub + | VecAluOpRRR::Vand + | VecAluOpRRR::Vor + | VecAluOpRRR::Vxor => 0b000, + // OPIMV + VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010, } } pub fn funct6(&self) -> u32 { + // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { VecAluOpRRR::Vadd => 0b000000, + VecAluOpRRR::Vsub => 0b000010, + VecAluOpRRR::Vmul => 0b100101, + VecAluOpRRR::Vmulh => 0b100111, + VecAluOpRRR::Vmulhu => 0b100100, + VecAluOpRRR::Vand => 0b001001, + VecAluOpRRR::Vor => 0b001010, + VecAluOpRRR::Vxor => 0b001011, } } } impl fmt::Display for VecAluOpRRR { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VecAluOpRRR::Vadd => write!(f, "vadd.vv"), - } + let mut s = format!("{self:?}"); + s.make_ascii_lowercase(); + s.push_str(".vv"); + f.write_str(&s) } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 729cbcf7545d..1558846790cd 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -59,6 +59,13 @@ ;; Register to Register ALU Ops (type VecAluOpRRR (enum (Vadd) + (Vsub) + (Vmul) + (Vmulh) + (Vmulhu) + (Vand) + (Vor) + (Vxor) )) @@ -138,3 +145,38 @@ (decl rv_vadd_vv (Reg Reg VState) Reg) (rule (rv_vadd_vv vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate)) + +;; Helper for emitting the `vsub.vv` instruction. +(decl rv_vsub_vv (Reg Reg VState) Reg) +(rule (rv_vsub_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate)) + +;; Helper for emitting the `vmul.vv` instruction. +(decl rv_vmul_vv (Reg Reg VState) Reg) +(rule (rv_vmul_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate)) + +;; Helper for emitting the `vmulh.vv` instruction. +(decl rv_vmulh_vv (Reg Reg VState) Reg) +(rule (rv_vmulh_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate)) + +;; Helper for emitting the `vmulhu.vv` instruction. +(decl rv_vmulhu_vv (Reg Reg VState) Reg) +(rule (rv_vmulhu_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate)) + +;; Helper for emitting the `vand.vv` instruction. +(decl rv_vand_vv (Reg Reg VState) Reg) +(rule (rv_vand_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate)) + +;; Helper for emitting the `vor.vv` instruction. +(decl rv_vor_vv (Reg Reg VState) Reg) +(rule (rv_vor_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate)) + +;; Helper for emitting the `vxor.vv` instruction. +(decl rv_vxor_vv (Reg Reg VState) Reg) +(rule (rv_vxor_vv vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate)) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 0a245f025a2d..d6275dac8c50 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -112,15 +112,19 @@ ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Base case, simply subtracting things in registers. -(rule -2 (lower (has_type (fits_in_64 ty) (isub x y))) +(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) (rv_sub x y)) -(rule -1 (lower (has_type (fits_in_32 ty) (isub x y))) +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y))) (rv_subw x y)) -(rule (lower (has_type $I128 (isub x y))) +(rule 2 (lower (has_type $I128 (isub x y))) (i128_sub x y)) +;; SIMD Vectors +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y))) + (rv_vsub_vv x y ty)) + ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. @@ -129,21 +133,14 @@ ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -2 (lower (has_type (fits_in_64 ty) (imul x y))) +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) (rv_mul x y)) -(rule -1 (lower (has_type (fits_in_32 ty) (imul x y))) - (rv_mulw x y)) - -;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 ty) (smulhi x y))) - (lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) -;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 ty) (umulhi x y))) - (lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) +(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y))) + (rv_mulw x y)) ;; for I128 -(rule (lower (has_type $I128 (imul x y))) +(rule 2 (lower (has_type $I128 (imul x y))) (let ((x_regs ValueRegs x) (x_lo Reg (value_regs_get x_regs 0)) @@ -169,6 +166,22 @@ (dst_lo Reg (madd x_lo y_lo (zero_reg)))) (value_regs dst_lo dst_hi))) +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y))) + (rv_vmul_vv x y ty)) + +;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y))) + (lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y))) + (rv_vmulh_vv x y ty)) + +;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y))) + (lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y))) + (rv_vmulhu_vv x y ty)) ;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -277,6 +290,10 @@ (value_regs low high))) +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y))) + (rv_vand_vv x y ty)) + + ;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (ty_int ty) (bor x y))) (gen_or ty x y)) @@ -320,6 +337,8 @@ (high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) (value_regs low high))) +(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y))) + (rv_vor_vv x y ty)) ;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y))) @@ -341,6 +360,8 @@ (rule (lower (has_type $F64 (bxor x y))) (lower_float_binary (AluOPRRR.Xor) x y $F64)) +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y))) + (rv_vxor_vv x y ty)) ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (ty_int ty) (bnot x))) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-band.clif b/cranelift/filetests/filetests/isa/riscv64/simd-band.clif new file mode 100644 index 000000000000..6726b53d831f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-band.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %band_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vand.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x26 +; ret + +function %band_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vand.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x26 +; ret + +function %band_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vand.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x26 +; ret + +function %band_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = band v0, v1 + return v2 +} + +; VCode: +; block0: +; vand.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x26 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-bor.clif b/cranelift/filetests/filetests/isa/riscv64/simd-bor.clif new file mode 100644 index 000000000000..6f6a191b62ce --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-bor.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %bor_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x2a +; ret + +function %bor_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x2a +; ret + +function %bor_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x2a +; ret + +function %bor_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = bor v0, v1 + return v2 +} + +; VCode: +; block0: +; vor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x2a +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-bxor.clif b/cranelift/filetests/filetests/isa/riscv64/simd-bxor.clif new file mode 100644 index 000000000000..de505094fd8f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-bxor.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %bxor_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vxor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x2e +; ret + +function %bxor_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vxor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x2e +; ret + +function %bxor_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vxor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x2e +; ret + +function %bxor_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = bxor v0, v1 + return v2 +} + +; VCode: +; block0: +; vxor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x2e +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif index 74b45c9ed03d..b026de76984a 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif @@ -12,13 +12,13 @@ block0(v0:i64x4, v1:i64x4): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=4, #vtype=(e64, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=4, #vtype=(e64, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x82, 0xcd -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i64x8(i64x8, i64x8) -> i64x8 { @@ -29,12 +29,12 @@ block0(v0:i64x8, v1:i64x8): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=8, #vtype=(e64, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=8, #vtype=(e64, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x84, 0xcd -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif index efeccdf1b50c..edab35d60a1c 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif @@ -11,13 +11,13 @@ block0(v0:i8x8, v1:i8x8): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=8, #vtype=(e8, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=8, #vtype=(e8, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x04, 0xcc -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i16x4(i16x4, i16x4) -> i16x4 { @@ -28,13 +28,13 @@ block0(v0:i16x4, v1:i16x4): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=4, #vtype=(e16, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=4, #vtype=(e16, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x82, 0xcc -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i32x2(i32x2, i32x2) -> i32x2 { @@ -45,12 +45,12 @@ block0(v0:i32x2, v1:i32x2): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=2, #vtype=(e32, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=2, #vtype=(e32, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x01, 0xcd -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif index 1fb20ca92f18..6b13f4f050c8 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif @@ -11,13 +11,13 @@ block0(v0: i8x16, v1: i8x16): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x08, 0xcc -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i16x8(i16x8, i16x8) -> i16x8 { @@ -28,13 +28,13 @@ block0(v0: i16x8, v1: i16x8): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=8, #vtype=(e16, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x84, 0xcc -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i32x4(i32x4, i32x4) -> i32x4 { @@ -45,13 +45,13 @@ block0(v0: i32x4, v1: i32x4): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=4, #vtype=(e32, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x02, 0xcd -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret function %iadd_i64x2(i64x2, i64x2) -> i64x2 { @@ -62,12 +62,12 @@ block0(v0: i64x2, v1: i64x2): ; VCode: ; block0: -; vadd.vv v10,v11,v10 #avl=2, #vtype=(e64, m1, ta, ma) +; vadd.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x81, 0xcd -; .byte 0x57, 0x05, 0xb5, 0x02 +; .byte 0x57, 0x85, 0xa5, 0x02 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-imul.clif b/cranelift/filetests/filetests/isa/riscv64/simd-imul.clif new file mode 100644 index 000000000000..04b85462cf38 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-imul.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %imul_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = imul v0, v1 + return v2 +} + +; VCode: +; block0: +; vmul.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x96 +; ret + +function %imul_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = imul v0, v1 + return v2 +} + +; VCode: +; block0: +; vmul.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x96 +; ret + +function %imul_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = imul v0, v1 + return v2 +} + +; VCode: +; block0: +; vmul.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x96 +; ret + +function %imul_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = imul v0, v1 + return v2 +} + +; VCode: +; block0: +; vmul.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x96 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif new file mode 100644 index 000000000000..536045967709 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %isub_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = isub v0, v1 + return v2 +} + +; VCode: +; block0: +; vsub.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x0a +; ret + +function %isub_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = isub v0, v1 + return v2 +} + +; VCode: +; block0: +; vsub.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x85, 0xa5, 0x0a +; ret + +function %isub_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = isub v0, v1 + return v2 +} + +; VCode: +; block0: +; vsub.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x0a +; ret + +function %isub_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = isub v0, v1 + return v2 +} + +; VCode: +; block0: +; vsub.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x85, 0xa5, 0x0a +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-smulhi.clif b/cranelift/filetests/filetests/isa/riscv64/simd-smulhi.clif new file mode 100644 index 000000000000..87e9b3716cbc --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-smulhi.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %smulhi_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = smulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulh.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x9e +; ret + +function %smulhi_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = smulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulh.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x9e +; ret + +function %smulhi_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = smulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulh.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x9e +; ret + +function %smulhi_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = smulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulh.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x9e +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-umulhi.clif b/cranelift/filetests/filetests/isa/riscv64/simd-umulhi.clif new file mode 100644 index 000000000000..b013f71c7921 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-umulhi.clif @@ -0,0 +1,73 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %umulhi_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = umulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulhu.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x92 +; ret + +function %umulhi_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = umulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulhu.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0xa5, 0xa5, 0x92 +; ret + +function %umulhi_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = umulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulhu.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x92 +; ret + +function %umulhi_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umulhi v0, v1 + return v2 +} + +; VCode: +; block0: +; vmulhu.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xa5, 0xa5, 0x92 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif b/cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif index dcc796069350..0eff3a179724 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif @@ -45,24 +45,24 @@ block2(v6: i8x16, v7: i8x16): ; VCode: ; block0: -; vadd.vv v5,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vv v5,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma) ; j label1 ; block1: -; vadd.vv v6,v5,v11 #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vv v6,v11,v5 #avl=16, #vtype=(e8, m1, ta, ma) ; j label2 ; block2: -; vadd.vv v10,v6,v5 #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vv v10,v5,v6 #avl=16, #vtype=(e8, m1, ta, ma) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x08, 0xcc -; .byte 0xd7, 0x02, 0xb5, 0x02 +; .byte 0xd7, 0x82, 0xa5, 0x02 ; block1: ; offset 0x8 ; .byte 0x57, 0x70, 0x08, 0xcc -; .byte 0x57, 0x83, 0x55, 0x02 +; .byte 0x57, 0x83, 0xb2, 0x02 ; block2: ; offset 0x10 ; .byte 0x57, 0x70, 0x08, 0xcc -; .byte 0x57, 0x85, 0x62, 0x02 +; .byte 0x57, 0x05, 0x53, 0x02 ; ret diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif index 59f80c9602d8..1783dea3b396 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif @@ -8,37 +8,6 @@ target x86_64 target x86_64 skylake -function %isub_i32x4(i32x4, i32x4) -> i32x4 { -block0(v0: i32x4, v1: i32x4): - v2 = isub v0, v1 - return v2 -} -; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3] - - -function %imul_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2, v1: i64x2): - v2 = imul v0, v1 - return v2 -} -; run: %imul_i64x2([0 2], [0 2]) == [0 4] - -function %imul_i32x4(i32x4, i32x4) -> i32x4 { -block0(v0: i32x4, v1: i32x4): - v2 = imul v0, v1 - return v2 -} -; run: %imul_i32x4([-1 0 1 0x80_00_00_01], [2 2 2 2]) == [-2 0 2 2] -; Note above how bits are truncated: 0x80_00_00_01 * 2 == 0x1_00_00_00_02, but -; the leading 1 is dropped. - -function %imul_i16x8(i16x8, i16x8) -> i16x8 { -block0(v0: i16x8, v1: i16x8): - v2 = imul v0, v1 - return v2 -} -; run: %imul_i16x8([-1 0 1 0x7f_ff 0 0 0 0], [2 2 2 2 0 0 0 0]) == [-2 0 2 0xff_fe 0 0 0 0] - function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): v2 = sadd_sat v0, v1 diff --git a/cranelift/filetests/filetests/runtests/simd-band.clif b/cranelift/filetests/filetests/runtests/simd-band.clif new file mode 100644 index 000000000000..2bfe927f3e22 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-band.clif @@ -0,0 +1,47 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 has_sse41=false +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + + +function %band_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0:i8x16, v1:i8x16): + v2 = band v0, v1 + return v2 +} +; run: %band_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] +; run: %band_i8x16([0xFE 0xEE 0xFF 0xFF 0xFE 0xEE 0xFF 0xFF 0xF1 0xFF 0xFE 0xFE 0xF1 0xFF 0xFE 0xFE], [0xDF 0xDB 0xFF 0xFF 0xDF 0xDB 0xFF 0xFF 0xCE 0xFF 0xEF 0xEF 0xCE 0xFF 0xEF 0xEF]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xC0 0xFF 0xEE 0xEE 0xC0 0xFF 0xEE 0xEE] + + +function %band_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0:i16x8, v1:i16x8): + v2 = band v0, v1 + return v2 +} +; run: %band_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0 0 0 0 0 0 0 0] +; run: %band_i16x8([0xFEEE 0xFFFF 0xFEEE 0xFFFF 0xF1FF 0xFEFE 0xF1FF 0xFEFE], [0xDFDB 0xFFFF 0xDFDB 0xFFFF 0xCEFF 0xEFEF 0xCEFF 0xEFEF]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xC0FF 0xEEEE 0xC0FF 0xEEEE] + + +function %band_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0:i32x4, v1:i32x4): + v2 = band v0, v1 + return v2 +} +; run: %band_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0 0 0 0] +; run: %band_i32x4([0xFEEEFFFF 0xFEEEFFFF 0xF1FFFEFE 0xF1FFFEFE], [0xDFDBFFFF 0xDFDBFFFF 0xCEFFEFEF 0xCEFFEFEF]) == [0xDECAFFFF 0xDECAFFFF 0xC0FFEEEE 0xC0FFEEEE] + + + +function %band_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0:i64x2, v1:i64x2): + v2 = band v0, v1 + return v2 +} +; run: %band_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0 0] +; run: %band_i64x2([0xFEEEFFFFFEEEFFFF 0xF1FFFEFEF1FFFEFE], [0xDFDBFFFFDFDBFFFF 0xCEFFEFEFCEFFEFEF]) == [0xDECAFFFFDECAFFFF 0xC0FFEEEEC0FFEEEE] + diff --git a/cranelift/filetests/filetests/runtests/simd-bor.clif b/cranelift/filetests/filetests/runtests/simd-bor.clif new file mode 100644 index 000000000000..15fe37de9204 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-bor.clif @@ -0,0 +1,45 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 has_sse41=false +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + + +function %bor_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0:i8x16, v1:i8x16): + v2 = bor v0, v1 + return v2 +} +; run: %bor_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF] +; run: %bor_i8x16([0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA], [0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF] + + +function %bor_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0:i16x8, v1:i16x8): + v2 = bor v0, v1 + return v2 +} +; run: %bor_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF] +; run: %bor_i16x8([0x8A8A 0xAAAA 0x8A8A 0xAAAA 0x8A8A 0xAAAA 0x8A8A 0xAAAA], [0x5440 0x5555 0x5440 0x5555 0x5440 0x5555 0x5440 0x5555]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF] + + +function %bor_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0:i32x4, v1:i32x4): + v2 = bor v0, v1 + return v2 +} +; run: %bor_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF] +; run: %bor_i32x4([0x8A8AAAAA 0x8A8AAAAA 0x8A8AAAAA 0x8A8AAAAA], [0x54405555 0x54405555 0x54405555 0x54405555]) == [0xDECAFFFF 0xDECAFFFF 0xDECAFFFF 0xDECAFFFF] + + +function %bor_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0:i64x2, v1:i64x2): + v2 = bor v0, v1 + return v2 +} +; run: %bor_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0xFFFFFFFFFFFFFFFF 0xFFFFFFFFFFFFFFFF] +; run: %bor_i64x2([0x8A8AAAAA8A8AAAAA 0x8A8AAAAA8A8AAAAA], [0x5440555554405555 0x5440555554405555]) == [0xDECAFFFFDECAFFFF 0xDECAFFFFDECAFFFF] diff --git a/cranelift/filetests/filetests/runtests/simd-bxor.clif b/cranelift/filetests/filetests/runtests/simd-bxor.clif new file mode 100644 index 000000000000..a2cce79362b6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-bxor.clif @@ -0,0 +1,45 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 has_sse41=false +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + + +function %bxor_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0:i8x16, v1:i8x16): + v2 = bxor v0, v1 + return v2 +} +; run: %bxor_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF] +; run: %bxor_i8x16([0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D], [0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF] + + +function %bxor_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0:i16x8, v1:i16x8): + v2 = bxor v0, v1 + return v2 +} +; run: %bxor_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF] +; run: %bxor_i16x8([0x9440 0xA07D 0x9440 0xA07D 0x9440 0xA07D 0x9440 0xA07D], [0x4A8A 0x5F82 0x4A8A 0x5F82 0x4A8A 0x5F82 0x4A8A 0x5F82]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF] + + +function %bxor_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0:i32x4, v1:i32x4): + v2 = bxor v0, v1 + return v2 +} +; run: %bxor_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF] +; run: %bxor_i32x4([0x9440A07D 0x9440A07D 0x9440A07D 0x9440A07D], [0x4A8A5F82 0x4A8A5F82 0x4A8A5F82 0x4A8A5F82]) == [0xDECAFFFF 0xDECAFFFF 0xDECAFFFF 0xDECAFFFF] + + +function %bxor_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0:i64x2, v1:i64x2): + v2 = bxor v0, v1 + return v2 +} +; run: %bxor_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0xFFFFFFFFFFFFFFFF 0xFFFFFFFFFFFFFFFF] +; run: %bxor_i64x2([0x9440A07D9440A07D 0x9440A07D9440A07D], [0x4A8A5F824A8A5F82 0x4A8A5F824A8A5F82]) == [0xDECAFFFFDECAFFFF 0xDECAFFFFDECAFFFF] diff --git a/cranelift/filetests/filetests/runtests/simd-imul-i8x16.clif b/cranelift/filetests/filetests/runtests/simd-imul-i8x16.clif new file mode 100644 index 000000000000..ffb336e525c6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-imul-i8x16.clif @@ -0,0 +1,13 @@ +test interpret +test run +target aarch64 +target s390x +target riscv64 has_v + + +function %imul_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0:i8x16, v1:i8x16): + v2 = imul v0, v1 + return v2 +} +; run: %imul_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [1 -2 3 -4 5 -6 7 -8 9 -10 11 -12 -13 14 -15 16]) == [1 -4 9 -16 25 -36 49 -64 81 -100 121 112 87 -60 31 0] diff --git a/cranelift/filetests/filetests/runtests/simd-imul.clif b/cranelift/filetests/filetests/runtests/simd-imul.clif new file mode 100644 index 000000000000..0580de3d53f7 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-imul.clif @@ -0,0 +1,38 @@ +test interpret +test run +target aarch64 +target s390x +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + +function %imul_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0:i16x8, v1:i16x8): + v2 = imul v0, v1 + return v2 +} +; run: %imul_i16x8([1 2 3 4 5 6 7 8], [1 -2 3 -4 5 -6 7 -8]) == [1 -4 9 -16 25 -36 49 -64] +; run: %imul_i16x8([-1 0 1 0x7f_ff 0 0 0 0], [2 2 2 2 0 0 0 0]) == [-2 0 2 0xff_fe 0 0 0 0] + + +function %imul_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0:i32x4, v1:i32x4): + v2 = imul v0, v1 + return v2 +} +; run: %imul_i32x4([1 2 3 4], [1 -2 3 -4]) == [1 -4 9 -16] + +; run: %imul_i32x4([-1 0 1 0x80_00_00_01], [2 2 2 2]) == [-2 0 2 2] +; Note above how bits are truncated: 0x80_00_00_01 * 2 == 0x1_00_00_00_02, but +; the leading 1 is dropped. + + + +function %imul_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0:i64x2, v1:i64x2): + v2 = imul v0, v1 + return v2 +} +; run: %imul_i64x2([1 1], [1 2]) == [1 2] +; run: %imul_i64x2([2 2], [-1 5]) == [-2 10] diff --git a/cranelift/filetests/filetests/runtests/simd-isub.clif b/cranelift/filetests/filetests/runtests/simd-isub.clif new file mode 100644 index 000000000000..09669041f037 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-isub.clif @@ -0,0 +1,44 @@ +test interpret +test run +target aarch64 +target s390x +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + + +function %isub_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0:i8x16, v1:i8x16): + v2 = isub v0, v1 + return v2 +} +; run: %isub_i8x16([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15] +; run: %isub_i8x16([2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]) == [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3] + + +function %isub_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0:i16x8, v1:i16x8): + v2 = isub v0, v1 + return v2 +} +; run: %isub_i16x8([1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8]) == [0 -1 -2 -3 -4 -5 -6 -7] +; run: %isub_i16x8([2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1]) == [3 3 3 3 3 3 3 3] + + +function %isub_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0:i32x4, v1:i32x4): + v2 = isub v0, v1 + return v2 +} +; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3] +; run: %isub_i32x4([2 2 2 2], [-1 -1 -1 -1]) == [3 3 3 3] + + +function %isub_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0:i64x2, v1:i64x2): + v2 = isub v0, v1 + return v2 +} +; run: %isub_i64x2([1 1], [1 2]) == [0 -1] +; run: %isub_i64x2([2 2], [-1 -1]) == [3 3] diff --git a/cranelift/filetests/filetests/runtests/simd-smulhi.clif b/cranelift/filetests/filetests/runtests/simd-smulhi.clif index 5fc0445bb496..370077eeb7e7 100644 --- a/cranelift/filetests/filetests/runtests/simd-smulhi.clif +++ b/cranelift/filetests/filetests/runtests/simd-smulhi.clif @@ -1,4 +1,6 @@ test interpret +test run +target riscv64 has_v ; The AArch64 and x86_64 backends only support scalar values. function %smulhi_i8x16(i8x16, i8x16) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-umulhi.clif b/cranelift/filetests/filetests/runtests/simd-umulhi.clif index b383bf5082b7..3574aa5b8d83 100644 --- a/cranelift/filetests/filetests/runtests/simd-umulhi.clif +++ b/cranelift/filetests/filetests/runtests/simd-umulhi.clif @@ -1,4 +1,6 @@ test interpret +test run +target riscv64 has_v ; x86_64 only supports `i16`, `i32`, and `i64` function %umulhi_i8(i8, i8) -> i8 {