From f52b35ab8a9b6576a50f1ed195ffe049c99f035f Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 10 Jun 2024 21:40:37 +0100 Subject: [PATCH 1/2] riscv64: Add support for `load+extend` patterns RISC-V doesen't have sinkable loads per se, but the regular load instructions support sign / zero extending the loaded values. We model those here as a sinkable load on the extend instruction. --- cranelift/codegen/src/isa/riscv64/inst.isle | 25 ++- cranelift/codegen/src/isa/riscv64/lower.isle | 20 ++ .../codegen/src/isa/riscv64/lower/isle.rs | 5 + .../filetests/isa/riscv64/load-extends.clif | 210 ++++++++++++++++++ 4 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/load-extends.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 250fce88c4fc..e539c3dd28fb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2422,7 +2422,18 @@ (gen_stack_slot_amode ss combined_offset)) +;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Extract a sinkable instruction from a value operand. +(decl sinkable_inst (Inst) Value) +(extern extractor sinkable_inst sinkable_inst) + +;; Matches a sinkable load. +(decl sinkable_load (Inst Type MemFlags Value Offset32) Value) +(extractor (sinkable_load inst ty flags addr offset) + (and + (load flags addr offset) + (sinkable_inst (has_type ty inst)))) ;; Returns a canonical type for a LoadOP. We only return I64 or F64. (decl load_op_reg_type (LoadOP) Type) @@ -2430,14 +2441,24 @@ (rule 1 (load_op_reg_type (LoadOP.Flw)) $F64) (rule 0 (load_op_reg_type _) $I64) -;; helper function to load from memory. +;; Helper constructor to build a load instruction. (decl gen_load (AMode LoadOP MemFlags) Reg) (rule (gen_load amode op flags) (let ((dst WritableReg (temp_writable_reg (load_op_reg_type op))) (_ Unit (emit (MInst.Load dst op flags amode)))) dst)) -;; helper function to store to memory. +;; Similar to `gen_load` but marks `Inst` as sunk at the current point. +;; +;; This is only useful for load op's that perform some additional computation +;; such as extending the loaded value. +(decl gen_sunk_load (Inst AMode LoadOP MemFlags) Reg) +(rule (gen_sunk_load inst amode op flags) + (let ((_ Unit (sink_inst inst))) + (gen_load amode op flags))) + + +;; Helper constructor to build a store instruction. ;; ;; This helper contains a special-case for zero constants stored to memory to ;; directly store the `zero` register to memory. See #7162 for some discussion diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 81aba31b86d9..2e10a253a351 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1115,16 +1115,36 @@ ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (fits_in_64 _) (uextend val))) (zext val)) + (rule 1 (lower (has_type $I128 (uextend val))) (value_regs (zext val) (imm $I64 0))) +;; When the source of an `uextend` is a load, we can merge both ops +(rule 2 (lower (has_type (fits_in_64 _) (uextend (sinkable_load inst ty flags addr offset)))) + (gen_sunk_load inst (amode addr offset) (uextend_load_op ty) flags)) + +(decl pure uextend_load_op (Type) LoadOP) +(rule (uextend_load_op $I8) (LoadOP.Lbu)) +(rule (uextend_load_op $I16) (LoadOP.Lhu)) +(rule (uextend_load_op $I32) (LoadOP.Lwu)) + ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (fits_in_64 _) (sextend val @ (value_type in_ty)))) (sext val)) + (rule 1 (lower (has_type $I128 (sextend val @ (value_type in_ty)))) (let ((lo XReg (sext val))) (value_regs lo (rv_srai lo (imm12_const 63))))) +;; When the source of an `sextend` is a load, we can merge both ops +(rule 2 (lower (has_type (fits_in_64 _) (sextend (sinkable_load inst ty flags addr offset)))) + (gen_sunk_load inst (amode addr offset) (sextend_load_op ty) flags)) + +(decl pure sextend_load_op (Type) LoadOP) +(rule (sextend_load_op $I8) (LoadOP.Lb)) +(rule (sextend_load_op $I16) (LoadOP.Lh)) +(rule (sextend_load_op $I32) (LoadOP.Lw)) + ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (fits_in_64 _) (popcnt x))) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 743220d0d3db..14b12e3a645b 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -450,6 +450,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> _ => None, } } + + fn sinkable_inst(&mut self, val: Value) -> Option { + self.is_sinkable_inst(val) + } + fn load_op(&mut self, ty: Type) -> LoadOP { LoadOP::from_type(ty) } diff --git a/cranelift/filetests/filetests/isa/riscv64/load-extends.clif b/cranelift/filetests/filetests/isa/riscv64/load-extends.clif new file mode 100644 index 000000000000..ffba7781174b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/load-extends.clif @@ -0,0 +1,210 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %load_uextend_i8_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i16 v1 + return v2 +} + +; VCode: +; block0: +; lbu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lbu a0, 0(a0) ; trap: heap_oob +; ret + +function %load_uextend_i8_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; lbu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lbu a0, 0(a0) ; trap: heap_oob +; ret + +function %load_uextend_i8_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lbu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lbu a0, 0(a0) ; trap: heap_oob +; ret + +function %load_uextend_i16_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = uextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; lhu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lhu a0, 0(a0) ; trap: heap_oob +; ret + +function %load_uextend_i16_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lhu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lhu a0, 0(a0) ; trap: heap_oob +; ret + +function %load_uextend_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i32 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lwu a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lwu a0, 0(a0) ; trap: heap_oob +; ret + + + +function %load_sextend_i8_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i16 v1 + return v2 +} + +; VCode: +; block0: +; lb a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lb a0, 0(a0) ; trap: heap_oob +; ret + +function %load_sextend_i8_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; lb a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lb a0, 0(a0) ; trap: heap_oob +; ret + +function %load_sextend_i8_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lb a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lb a0, 0(a0) ; trap: heap_oob +; ret + +function %load_sextend_i16_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = sextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; lh a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lh a0, 0(a0) ; trap: heap_oob +; ret + +function %load_sextend_i16_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lh a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lh a0, 0(a0) ; trap: heap_oob +; ret + +function %load_sextend_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i32 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; lw a0,0(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; lw a0, 0(a0) ; trap: heap_oob +; ret + From 50e8dd30a15cae08389a9a2ba50ecba18b1b79fc Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 11 Jun 2024 10:05:48 +0100 Subject: [PATCH 2/2] riscv64: Clarify sinkable loads on RISC-V --- cranelift/codegen/src/isa/riscv64/inst.isle | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index e539c3dd28fb..5dc9c998929f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2424,6 +2424,11 @@ ;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RISC-V doesen't really have sinkable loads. But the regular load instructions +;; sign / zero extend their results to 64 bits. So we can pretend they are +;; an extend instruction with a sinkable load. This allows us to have better +;; lowerings on these cases. + ;; Extract a sinkable instruction from a value operand. (decl sinkable_inst (Inst) Value) (extern extractor sinkable_inst sinkable_inst)