From 1df446b790abe27ebb4e510a7b3578a1f3efa4d8 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 25 Apr 2023 15:50:00 +0100 Subject: [PATCH] riscv64: Support non 128bit vector sizes (#6266) * riscv64: Add `Zvl` extensions * riscv64: Allow lowering SIMD operations that fit in a vector register * riscv64: Support non 128bit vector sizes * riscv64: Add Zvl Presets * riscv64: Precompute `min_vec_reg_size` --- cranelift/codegen/meta/src/cdsl/mod.rs | 2 +- cranelift/codegen/meta/src/gen_types.rs | 2 +- cranelift/codegen/meta/src/isa/riscv64.rs | 43 +++++++++ cranelift/codegen/src/isa/riscv64/inst/mod.rs | 20 +++- .../codegen/src/isa/riscv64/inst_vector.isle | 8 ++ cranelift/codegen/src/isa/riscv64/lower.isle | 15 ++- .../codegen/src/isa/riscv64/lower/isle.rs | 96 +++++++++++++++---- cranelift/codegen/src/isle_prelude.rs | 5 + cranelift/codegen/src/prelude.isle | 5 +- .../filetests/isa/riscv64/simd-iadd-big.clif | 40 ++++++++ .../isa/riscv64/simd-iadd-small.clif | 56 +++++++++++ .../filetests/runtests/simd-iadd-small.clif | 31 ++++++ 12 files changed, 291 insertions(+), 32 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-iadd-small.clif diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs index 565783ad1680..4bde7d3e4a1a 100644 --- a/cranelift/codegen/meta/src/cdsl/mod.rs +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -28,7 +28,7 @@ macro_rules! preset { () => { vec![] }; - ($($x:ident)&&*) => { + ($($x:tt)&&*) => { { let mut v = Vec::new(); $( diff --git a/cranelift/codegen/meta/src/gen_types.rs b/cranelift/codegen/meta/src/gen_types.rs index f83638fd7f0d..7c7cacda31b0 100644 --- a/cranelift/codegen/meta/src/gen_types.rs +++ b/cranelift/codegen/meta/src/gen_types.rs @@ -60,7 +60,7 @@ fn emit_types(fmt: &mut srcgen::Formatter) { // Emit vector definitions for common SIMD sizes. // Emit dynamic vector definitions. - for vec_size in &[64_u64, 128, 256, 512] { + for vec_size in &[16_u64, 32, 64, 128, 256, 512] { emit_vectors(*vec_size, fmt); emit_dynamic_vectors(*vec_size, fmt); } diff --git a/cranelift/codegen/meta/src/isa/riscv64.rs b/cranelift/codegen/meta/src/isa/riscv64.rs index 3e0d88302588..bf825bb2baf5 100644 --- a/cranelift/codegen/meta/src/isa/riscv64.rs +++ b/cranelift/codegen/meta/src/isa/riscv64.rs @@ -3,6 +3,30 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; +macro_rules! define_zvl_ext { + (DEF: $settings:expr, $size:expr) => {{ + let name = concat!("has_zvl", $size, "b"); + let desc = concat!("has extension Zvl", $size, "b?"); + let comment = concat!( + "Zvl", + $size, + "b: Vector register has a minimum of ", + $size, + " bits" + ); + $settings.add_bool(&name, &desc, &comment, false) + }}; + ($settings:expr, $size:expr $(, $implies:expr)*) => {{ + let has_feature = define_zvl_ext!(DEF: $settings, $size); + + let name = concat!("zvl", $size, "b"); + let desc = concat!("Has a vector register size of at least ", $size, " bits"); + + let preset = $settings.add_preset(&name, &desc, preset!(has_feature $( && $implies )*)); + (has_feature, preset) + }}; +} + fn define_settings(_shared: &SettingGroup) -> SettingGroup { let mut setting = SettingGroupBuilder::new("riscv64"); @@ -56,6 +80,25 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { false, ); + // Zvl*: Minimum Vector Length Standard Extensions + // These extension specifiy the minimum number of bits in a vector register. + // Since it is a minimum, Zvl64b implies Zvl32b, Zvl128b implies Zvl64b, etc. + // The V extension supports a maximum of 64K bits in a single register. + // + // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#181-zvl-minimum-vector-length-standard-extensions + let (_, zvl32b) = define_zvl_ext!(setting, 32); + let (_, zvl64b) = define_zvl_ext!(setting, 64, zvl32b); + let (_, zvl128b) = define_zvl_ext!(setting, 128, zvl64b); + let (_, zvl256b) = define_zvl_ext!(setting, 256, zvl128b); + let (_, zvl512b) = define_zvl_ext!(setting, 512, zvl256b); + let (_, zvl1024b) = define_zvl_ext!(setting, 1024, zvl512b); + let (_, zvl2048b) = define_zvl_ext!(setting, 2048, zvl1024b); + let (_, zvl4096b) = define_zvl_ext!(setting, 4096, zvl2048b); + let (_, zvl8192b) = define_zvl_ext!(setting, 8192, zvl4096b); + let (_, zvl16384b) = define_zvl_ext!(setting, 16384, zvl8192b); + let (_, zvl32768b) = define_zvl_ext!(setting, 32768, zvl16384b); + let (_, _zvl65536b) = define_zvl_ext!(setting, 65536, zvl32768b); + setting.build() } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index aa4e670ec6e6..c89843c2058f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -761,7 +761,25 @@ impl MachInst for Inst { F32 => Ok((&[RegClass::Float], &[F32])), F64 => Ok((&[RegClass::Float], &[F64])), I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), - _ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])), + _ if ty.is_vector() => { + debug_assert!(ty.bits() <= 512); + + // Here we only need to return a SIMD type with the same size as `ty`. + // We use these types for spills and reloads, so prefer types with lanes <= 31 + // since that fits in the immediate field of `vsetivli`. + const SIMD_TYPES: [[Type; 1]; 6] = [ + [types::I8X2], + [types::I8X4], + [types::I8X8], + [types::I8X16], + [types::I16X16], + [types::I32X16], + ]; + let idx = (ty.bytes().ilog2() - 1) as usize; + let ty = &SIMD_TYPES[idx][..]; + + Ok((&[RegClass::Float], ty)) + } _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 7d56576e7c88..729cbcf7545d 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -99,6 +99,14 @@ (if-let $I64 (lane_type ty)) (VecElementWidth.E64)) +(decl pure min_vec_reg_size () u64) +(extern constructor min_vec_reg_size min_vec_reg_size) + +;; An extractor that matches any type that is known to fit in a single vector +;; register. +(decl ty_vec_fits_in_register (Type) Type) +(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register) + ;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; As noted in the RISC-V Vector Extension Specification, rs2 is the first diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 4df78191d48b..0a245f025a2d 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -28,14 +28,14 @@ ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Base case, simply adding things in registers. -(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) (rv_add x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) +(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y)))) (alu_rr_imm12 (select_addi ty) x y)) -(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) +(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y))) (alu_rr_imm12 (select_addi ty) y x)) ;; Special case when one of the operands is uextended @@ -98,8 +98,7 @@ (value_regs low high))) ;; SIMD Vectors -(rule 8 (lower (has_type (ty_vec128_int ty) (iadd x y))) - (if-let $true (has_v)) +(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y))) (rv_vadd_vv x y ty)) ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; @@ -815,8 +814,7 @@ (gen_load_128 p offset flags)) (rule 2 - (lower (has_type (ty_vec128_int ty) (load flags p @ (value_type (ty_addr64 _)) offset))) - (if-let $true (has_v)) + (lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset))) (let ((eew VecElementWidth (element_width_from_type ty))) (vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags ty))) @@ -845,8 +843,7 @@ (gen_store_128 p offset flags x)) (rule 2 - (lower (store flags x @ (value_type (ty_vec128_int ty)) p @ (value_type (ty_addr64 _)) offset)) - (if-let $true (has_v)) + (lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset)) (let ((eew VecElementWidth (element_width_from_type ty))) (vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags ty))) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 0dbcf937e25a..1f35867c0711 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -33,23 +33,67 @@ type VecMachLabel = Vec; type VecArgPair = Vec; use crate::machinst::valueregs; -/// The main entry point for lowering with ISLE. -pub(crate) fn lower( - lower_ctx: &mut Lower, - backend: &Riscv64Backend, - inst: Inst, -) -> Option { - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = IsleContext { lower_ctx, backend }; - generated_code::constructor_lower(&mut isle_ctx, inst) +pub(crate) struct RV64IsleContext<'a, 'b, I, B> +where + I: VCodeInst, + B: LowerBackend, +{ + pub lower_ctx: &'a mut Lower<'b, I>, + pub backend: &'a B, + /// Precalucated value for the minimum vector register size. Will be 0 if + /// vectors are not supported. + min_vec_reg_size: u64, } -impl IsleContext<'_, '_, MInst, Riscv64Backend> { +impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> { isle_prelude_method_helpers!(Riscv64ABICaller); + + fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self { + Self { + lower_ctx, + backend, + min_vec_reg_size: Self::compute_min_vec_reg_size(backend), + } + } + + fn compute_min_vec_reg_size(backend: &Riscv64Backend) -> u64 { + let flags = &backend.isa_flags; + let entries = [ + (flags.has_zvl65536b(), 65536), + (flags.has_zvl32768b(), 32768), + (flags.has_zvl16384b(), 16384), + (flags.has_zvl8192b(), 8192), + (flags.has_zvl4096b(), 4096), + (flags.has_zvl2048b(), 2048), + (flags.has_zvl1024b(), 1024), + (flags.has_zvl512b(), 512), + (flags.has_zvl256b(), 256), + // In order to claim the Application Profile V extension, a minimum + // register size of 128 is required. i.e. V implies Zvl128b. + (flags.has_v(), 128), + (flags.has_zvl128b(), 128), + (flags.has_zvl64b(), 64), + (flags.has_zvl32b(), 32), + ]; + + for (has_flag, size) in entries.into_iter() { + if has_flag { + return size; + } + } + + return 0; + } + + #[inline] + fn emit_list(&mut self, list: &SmallInstVec) { + for i in list { + self.lower_ctx.emit(i.clone()); + } + } } -impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { +impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> { isle_lower_prelude_methods!(); isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller); @@ -437,17 +481,33 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { fn vstate_from_type(&mut self, ty: Type) -> VState { VState::from_type(ty) } -} -impl IsleContext<'_, '_, MInst, Riscv64Backend> { + fn min_vec_reg_size(&mut self) -> u64 { + self.min_vec_reg_size + } + #[inline] - fn emit_list(&mut self, list: &SmallInstVec) { - for i in list { - self.lower_ctx.emit(i.clone()); + fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option { + if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() { + Some(ty) + } else { + None } } } +/// The main entry point for lowering with ISLE. +pub(crate) fn lower( + lower_ctx: &mut Lower, + backend: &Riscv64Backend, + inst: Inst, +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); + generated_code::constructor_lower(&mut isle_ctx, inst) +} + /// The main entry point for branch lowering with ISLE. pub(crate) fn lower_branch( lower_ctx: &mut Lower, @@ -457,7 +517,7 @@ pub(crate) fn lower_branch( ) -> Option<()> { // TODO: reuse the ISLE context across lowerings so we can reuse its // internal heap allocations. - let mut isle_ctx = IsleContext { lower_ctx, backend }; + let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) } diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index d64973ce5b42..6aac0c5b6a27 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -303,6 +303,11 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn ty_int_ref_scalar_64_extract(&mut self, ty: Type) -> Option { + self.ty_int_ref_scalar_64(ty) + } + #[inline] fn ty_32(&mut self, ty: Type) -> Option { if ty.bits() == 32 { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index ad703ed8fe87..fcebfd5d5782 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -340,10 +340,11 @@ (decl ty_64 (Type) Type) (extern extractor ty_64 ty_64) -;; A pure constructor that only matches scalar integers, and references that can -;; fit in 64 bits. +;; A pure constructor/extractor that only matches scalar integers, and +;; references that can fit in 64 bits. (decl pure partial ty_int_ref_scalar_64 (Type) Type) (extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64) +(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract) ;; An extractor that matches 32- and 64-bit types only. (decl ty_32_or_64 (Type) Type) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif new file mode 100644 index 000000000000..74b45c9ed03d --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-big.clif @@ -0,0 +1,40 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v has_zvl2048b + + + +function %iadd_i64x4(i64x4, i64x4) -> i64x4 { +block0(v0:i64x4, v1:i64x4): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; vadd.vv v10,v11,v10 #avl=4, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x82, 0xcd +; .byte 0x57, 0x05, 0xb5, 0x02 +; ret + +function %iadd_i64x8(i64x8, i64x8) -> i64x8 { +block0(v0:i64x8, v1:i64x8): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; vadd.vv v10,v11,v10 #avl=8, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcd +; .byte 0x57, 0x05, 0xb5, 0x02 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif new file mode 100644 index 000000000000..efeccdf1b50c --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd-small.clif @@ -0,0 +1,56 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %iadd_i8x8(i8x8, i8x8) -> i8x8 { +block0(v0:i8x8, v1:i8x8): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; vadd.vv v10,v11,v10 #avl=8, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x04, 0xcc +; .byte 0x57, 0x05, 0xb5, 0x02 +; ret + +function %iadd_i16x4(i16x4, i16x4) -> i16x4 { +block0(v0:i16x4, v1:i16x4): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; vadd.vv v10,v11,v10 #avl=4, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x82, 0xcc +; .byte 0x57, 0x05, 0xb5, 0x02 +; ret + +function %iadd_i32x2(i32x2, i32x2) -> i32x2 { +block0(v0:i32x2, v1:i32x2): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; vadd.vv v10,v11,v10 #avl=2, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x01, 0xcd +; .byte 0x57, 0x05, 0xb5, 0x02 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-small.clif b/cranelift/filetests/filetests/runtests/simd-iadd-small.clif new file mode 100644 index 000000000000..cfbed6b1a626 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-iadd-small.clif @@ -0,0 +1,31 @@ +test interpret +test run +target riscv64 has_v + +;; We only test 64bit values here since the interpreter does not support anything smaller. + +function %iadd_i8x8(i8x8, i8x8) -> i8x8 { +block0(v0:i8x8, v1:i8x8): + v2 = iadd v0, v1 + return v2 +} +; run: %iadd_i8x8([1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8]) == [2 3 4 5 6 7 8 9] +; run: %iadd_i8x8([2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1]) == [1 1 1 1 1 1 1 1] + + +function %iadd_i16x4(i16x4, i16x4) -> i16x4 { +block0(v0:i16x4, v1:i16x4): + v2 = iadd v0, v1 + return v2 +} +; run: %iadd_i16x4([1 1 1 1], [1 2 3 4]) == [2 3 4 5] +; run: %iadd_i16x4([2 2 2 2], [-1 -1 -1 -1]) == [1 1 1 1] + + +function %iadd_i32x2(i32x2, i32x2) -> i32x2 { +block0(v0:i32x2, v1:i32x2): + v2 = iadd v0, v1 + return v2 +} +; run: %iadd_i32x2([1 1], [1 2]) == [2 3] +; run: %iadd_i32x2([2 2], [-1 -1]) == [1 1]