Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement a few SIMD arithmetic ops #6268

Merged
merged 8 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@
(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
(vs1 Reg)
(vs2 Reg)
(vs1 Reg)
(vstate VState))

(VecSetState
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub fn encode_valu(
) -> u32 {
let funct6 = funct6 & 0b111111;
let vm = vm & 0b1;
let funct7 = (funct6 << 6) | vm;
let funct7 = (funct6 << 1) | vm;
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
}

Expand Down
29 changes: 22 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,27 +214,42 @@ impl fmt::Display for VState {

impl VecAluOpRRR {
pub fn opcode(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0x57,
}
// Vector Opcode
0x57
}
pub fn funct3(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000,
// OPIVV
VecAluOpRRR::Vadd
| VecAluOpRRR::Vsub
| VecAluOpRRR::Vand
| VecAluOpRRR::Vor
| VecAluOpRRR::Vxor => 0b000,
// OPIMV
VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010,
}
}
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::Vadd => 0b000000,
VecAluOpRRR::Vsub => 0b000010,
VecAluOpRRR::Vmul => 0b100101,
VecAluOpRRR::Vmulh => 0b100111,
VecAluOpRRR::Vmulhu => 0b100100,
VecAluOpRRR::Vand => 0b001001,
VecAluOpRRR::Vor => 0b001010,
VecAluOpRRR::Vxor => 0b001011,
}
}
}

impl fmt::Display for VecAluOpRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
}
let mut s = format!("{self:?}");
s.make_ascii_lowercase();
s.push_str(".vv");
f.write_str(&s)
}
}

Expand Down
42 changes: 42 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@
;; Register to Register ALU Ops
(type VecAluOpRRR (enum
(Vadd)
(Vsub)
(Vmul)
(Vmulh)
(Vmulhu)
(Vand)
(Vor)
(Vxor)
))


Expand Down Expand Up @@ -138,3 +145,38 @@
(decl rv_vadd_vv (Reg Reg VState) Reg)
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))

;; Helper for emitting the `vsub.vv` instruction.
(decl rv_vsub_vv (Reg Reg VState) Reg)
(rule (rv_vsub_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate))

;; Helper for emitting the `vmul.vv` instruction.
(decl rv_vmul_vv (Reg Reg VState) Reg)
(rule (rv_vmul_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate))

;; Helper for emitting the `vmulh.vv` instruction.
(decl rv_vmulh_vv (Reg Reg VState) Reg)
(rule (rv_vmulh_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate))

;; Helper for emitting the `vmulhu.vv` instruction.
(decl rv_vmulhu_vv (Reg Reg VState) Reg)
(rule (rv_vmulhu_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate))

;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (Reg Reg VState) Reg)
(rule (rv_vand_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate))

;; Helper for emitting the `vor.vv` instruction.
(decl rv_vor_vv (Reg Reg VState) Reg)
(rule (rv_vor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate))

;; Helper for emitting the `vxor.vv` instruction.
(decl rv_vxor_vv (Reg Reg VState) Reg)
(rule (rv_vxor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate))
49 changes: 35 additions & 14 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,19 @@
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply subtracting things in registers.

(rule -2 (lower (has_type (fits_in_64 ty) (isub x y)))
(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
(rv_sub x y))

(rule -1 (lower (has_type (fits_in_32 ty) (isub x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
(rv_subw x y))

(rule (lower (has_type $I128 (isub x y)))
(rule 2 (lower (has_type $I128 (isub x y)))
(i128_sub x y))

;; SIMD Vectors
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
(rv_vsub_vv x y ty))

;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `i64` and smaller.
Expand All @@ -129,21 +133,14 @@

;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule -2 (lower (has_type (fits_in_64 ty) (imul x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
(rv_mul x y))
(rule -1 (lower (has_type (fits_in_32 ty) (imul x y)))
(rv_mulw x y))

;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))

;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
(rv_mulw x y))

;; for I128
(rule (lower (has_type $I128 (imul x y)))
(rule 2 (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
Expand All @@ -169,6 +166,22 @@
(dst_lo Reg (madd x_lo y_lo (zero_reg))))
(value_regs dst_lo dst_hi)))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
(rv_vmul_vv x y ty))

;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
(rv_vmulh_vv x y ty))

;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
(rv_vmulhu_vv x y ty))

;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -277,6 +290,10 @@
(value_regs low high)))


(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
(rv_vand_vv x y ty))


;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bor x y)))
(gen_or ty x y))
Expand Down Expand Up @@ -320,6 +337,8 @@
(high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))

(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
(rv_vor_vv x y ty))

;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
Expand All @@ -341,6 +360,8 @@
(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
(rv_vxor_vv x y ty))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bnot x)))
Expand Down
73 changes: 73 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-band.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v


function %band_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

function %band_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = band v0, v1
return v2
}

; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

73 changes: 73 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-bor.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v


function %bor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

function %bor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bor v0, v1
return v2
}

; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

Loading