Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Add support for load+extend patterns #8765

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2422,22 +2422,48 @@
(gen_stack_slot_amode ss combined_offset))


;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; RISC-V doesen't really have sinkable loads. But the regular load instructions
;; sign / zero extend their results to 64 bits. So we can pretend they are
;; an extend instruction with a sinkable load. This allows us to have better
;; lowerings on these cases.

;; Extract a sinkable instruction from a value operand.
(decl sinkable_inst (Inst) Value)
(extern extractor sinkable_inst sinkable_inst)

;; Matches a sinkable load.
(decl sinkable_load (Inst Type MemFlags Value Offset32) Value)
(extractor (sinkable_load inst ty flags addr offset)
(and
(load flags addr offset)
(sinkable_inst (has_type ty inst))))
Comment on lines +2432 to +2441
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment here about the extend(load()) use case and the asterisk around "sinkable" that you added in the PR description? That seems like good context to have when reading this code.


;; Returns a canonical type for a LoadOP. We only return I64 or F64.
(decl load_op_reg_type (LoadOP) Type)
(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
(rule 0 (load_op_reg_type _) $I64)

;; helper function to load from memory.
;; Helper constructor to build a load instruction.
(decl gen_load (AMode LoadOP MemFlags) Reg)
(rule (gen_load amode op flags)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags amode))))
dst))

;; helper function to store to memory.
;; Similar to `gen_load` but marks `Inst` as sunk at the current point.
;;
;; This is only useful for load op's that perform some additional computation
;; such as extending the loaded value.
(decl gen_sunk_load (Inst AMode LoadOP MemFlags) Reg)
(rule (gen_sunk_load inst amode op flags)
(let ((_ Unit (sink_inst inst)))
(gen_load amode op flags)))


;; Helper constructor to build a store instruction.
;;
;; This helper contains a special-case for zero constants stored to memory to
;; directly store the `zero` register to memory. See #7162 for some discussion
Expand Down
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1115,16 +1115,36 @@
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_64 _) (uextend val)))
(zext val))

(rule 1 (lower (has_type $I128 (uextend val)))
(value_regs (zext val) (imm $I64 0)))

;; When the source of an `uextend` is a load, we can merge both ops
(rule 2 (lower (has_type (fits_in_64 _) (uextend (sinkable_load inst ty flags addr offset))))
(gen_sunk_load inst (amode addr offset) (uextend_load_op ty) flags))

(decl pure uextend_load_op (Type) LoadOP)
(rule (uextend_load_op $I8) (LoadOP.Lbu))
(rule (uextend_load_op $I16) (LoadOP.Lhu))
(rule (uextend_load_op $I32) (LoadOP.Lwu))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_64 _) (sextend val @ (value_type in_ty))))
(sext val))

(rule 1 (lower (has_type $I128 (sextend val @ (value_type in_ty))))
(let ((lo XReg (sext val)))
(value_regs lo (rv_srai lo (imm12_const 63)))))

;; When the source of an `sextend` is a load, we can merge both ops
(rule 2 (lower (has_type (fits_in_64 _) (sextend (sinkable_load inst ty flags addr offset))))
(gen_sunk_load inst (amode addr offset) (sextend_load_op ty) flags))

(decl pure sextend_load_op (Type) LoadOP)
(rule (sextend_load_op $I8) (LoadOP.Lb))
(rule (sextend_load_op $I16) (LoadOP.Lh))
(rule (sextend_load_op $I32) (LoadOP.Lw))

;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_64 _) (popcnt x)))
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
_ => None,
}
}

fn sinkable_inst(&mut self, val: Value) -> Option<Inst> {
self.is_sinkable_inst(val)
}

fn load_op(&mut self, ty: Type) -> LoadOP {
LoadOP::from_type(ty)
}
Expand Down
210 changes: 210 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/load-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
test compile precise-output
set unwind_info=false
target riscv64

function %load_uextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i16 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; lhu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lhu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lhu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lhu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lwu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lwu a0, 0(a0) ; trap: heap_oob
; ret



function %load_sextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i16 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; lh a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lh a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lh a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lh a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lw a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lw a0, 0(a0) ; trap: heap_oob
; ret