Skip to content

Commit

Permalink
riscv64: Implement a few SIMD arithmetic ops (bytecodealliance#6268)
Browse files Browse the repository at this point in the history
* riscv64: Swap order of `VecAluRRR` source registers

These were accidentally reversed from what we declare in the isle emit helper

* riscv64: Add SIMD `isub`

* riscv64: Add SIMD `imul`

* riscv64: Add `{u,s}mulhi`

* riscv64: Add `b{and,or,xor}`

* cranelift: Move `imul.i8x16` runtest to separate file

Looks like x86 does not implement it

* riscv64: Better formatting for `VecAluOpRRR`

* cranelift: Enable x86 SIMD tests with `has_sse41=false`
  • Loading branch information
afonso360 authored and eduardomourar committed Apr 28, 2023
1 parent 1df446b commit 00a7529
Show file tree
Hide file tree
Showing 25 changed files with 872 additions and 78 deletions.
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@
(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
(vs1 Reg)
(vs2 Reg)
(vs1 Reg)
(vstate VState))

(VecSetState
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub fn encode_valu(
) -> u32 {
let funct6 = funct6 & 0b111111;
let vm = vm & 0b1;
let funct7 = (funct6 << 6) | vm;
let funct7 = (funct6 << 1) | vm;
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
}

Expand Down
29 changes: 22 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,27 +214,42 @@ impl fmt::Display for VState {

impl VecAluOpRRR {
pub fn opcode(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0x57,
}
// Vector Opcode
0x57
}
pub fn funct3(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000,
// OPIVV
VecAluOpRRR::Vadd
| VecAluOpRRR::Vsub
| VecAluOpRRR::Vand
| VecAluOpRRR::Vor
| VecAluOpRRR::Vxor => 0b000,
// OPIMV
VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010,
}
}
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::Vadd => 0b000000,
VecAluOpRRR::Vsub => 0b000010,
VecAluOpRRR::Vmul => 0b100101,
VecAluOpRRR::Vmulh => 0b100111,
VecAluOpRRR::Vmulhu => 0b100100,
VecAluOpRRR::Vand => 0b001001,
VecAluOpRRR::Vor => 0b001010,
VecAluOpRRR::Vxor => 0b001011,
}
}
}

impl fmt::Display for VecAluOpRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
}
let mut s = format!("{self:?}");
s.make_ascii_lowercase();
s.push_str(".vv");
f.write_str(&s)
}
}

Expand Down
42 changes: 42 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@
;; Register to Register ALU Ops
(type VecAluOpRRR (enum
(Vadd)
(Vsub)
(Vmul)
(Vmulh)
(Vmulhu)
(Vand)
(Vor)
(Vxor)
))


Expand Down Expand Up @@ -138,3 +145,38 @@
(decl rv_vadd_vv (Reg Reg VState) Reg)
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))

;; Helper for emitting the `vsub.vv` instruction.
(decl rv_vsub_vv (Reg Reg VState) Reg)
(rule (rv_vsub_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate))

;; Helper for emitting the `vmul.vv` instruction.
(decl rv_vmul_vv (Reg Reg VState) Reg)
(rule (rv_vmul_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate))

;; Helper for emitting the `vmulh.vv` instruction.
(decl rv_vmulh_vv (Reg Reg VState) Reg)
(rule (rv_vmulh_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate))

;; Helper for emitting the `vmulhu.vv` instruction.
(decl rv_vmulhu_vv (Reg Reg VState) Reg)
(rule (rv_vmulhu_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate))

;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (Reg Reg VState) Reg)
(rule (rv_vand_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate))

;; Helper for emitting the `vor.vv` instruction.
(decl rv_vor_vv (Reg Reg VState) Reg)
(rule (rv_vor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate))

;; Helper for emitting the `vxor.vv` instruction.
(decl rv_vxor_vv (Reg Reg VState) Reg)
(rule (rv_vxor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate))
49 changes: 35 additions & 14 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,19 @@
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply subtracting things in registers.

(rule -2 (lower (has_type (fits_in_64 ty) (isub x y)))
(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
(rv_sub x y))

(rule -1 (lower (has_type (fits_in_32 ty) (isub x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
(rv_subw x y))

(rule (lower (has_type $I128 (isub x y)))
(rule 2 (lower (has_type $I128 (isub x y)))
(i128_sub x y))

;; SIMD Vectors
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
(rv_vsub_vv x y ty))

;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i64` and smaller.
Expand All @@ -129,21 +133,14 @@

;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule -2 (lower (has_type (fits_in_64 ty) (imul x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
(rv_mul x y))
(rule -1 (lower (has_type (fits_in_32 ty) (imul x y)))
(rv_mulw x y))

;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))

;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
(rv_mulw x y))

;; for I128
(rule (lower (has_type $I128 (imul x y)))
(rule 2 (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
Expand All @@ -169,6 +166,22 @@
(dst_lo Reg (madd x_lo y_lo (zero_reg))))
(value_regs dst_lo dst_hi)))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
(rv_vmul_vv x y ty))

;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
(rv_vmulh_vv x y ty))

;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
(rv_vmulhu_vv x y ty))

;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -277,6 +290,10 @@
(value_regs low high)))


(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
(rv_vand_vv x y ty))


;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bor x y)))
(gen_or ty x y))
Expand Down Expand Up @@ -320,6 +337,8 @@
(high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))

(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
(rv_vor_vv x y ty))

;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
Expand All @@ -341,6 +360,8 @@
(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
(rv_vxor_vv x y ty))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bnot x)))
Expand Down
73 changes: 73 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-band.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v


function %band_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

73 changes: 73 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-bor.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v


function %bor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

Loading

0 comments on commit 00a7529

Please sign in to comment.