Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Add support for load+extends patterns #8774

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,17 @@
(u8_from_uimm8 lane)))))
(value_regs (mov_from_vec (put_in_reg vec) lane (lane_size in)) (imm $I64 (ImmExtend.Zero) 0)))

;; Zero extensions from a load can be encoded in the load itself
(rule (lower (has_type (fits_in_64 _) (uextend x @ (has_type in_ty (load flags address offset)))))
(if-let inst (is_sinkable_inst x))
(let ((_ Unit (sink_inst inst)))
(aarch64_uload in_ty (amode in_ty address offset) flags)))

(decl aarch64_uload (Type AMode MemFlags) Reg)
(rule (aarch64_uload $I8 amode flags) (aarch64_uload8 amode flags))
(rule (aarch64_uload $I16 amode flags) (aarch64_uload16 amode flags))
(rule (aarch64_uload $I32 amode flags) (aarch64_uload32 amode flags))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; General rule for extending input to an output which fits in a single
Expand Down Expand Up @@ -1187,6 +1198,17 @@
(hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63))))
(value_regs lo hi)))

;; Signed extensions from a load can be encoded in the load itself
(rule (lower (has_type (fits_in_64 _) (sextend x @ (has_type in_ty (load flags address offset)))))
(if-let inst (is_sinkable_inst x))
(let ((_ Unit (sink_inst inst)))
(aarch64_sload in_ty (amode in_ty address offset) flags)))

(decl aarch64_sload (Type AMode MemFlags) Reg)
(rule (aarch64_sload $I8 amode flags) (aarch64_sload8 amode flags))
(rule (aarch64_sload $I16 amode flags) (aarch64_sload16 amode flags))
(rule (aarch64_sload $I32 amode flags) (aarch64_sload32 amode flags))

;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Base case using `orn` between two registers.
Expand Down
208 changes: 208 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/load-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
test compile precise-output
set unwind_info=false
target aarch64

function %load_uextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i16 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrb w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrh w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrh w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0] ; trap: heap_oob
; ret

function %load_uextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; ldr w0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i16 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsb x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsb x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; ldrsh x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsh x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsh x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsh x0, [x0] ; trap: heap_oob
; ret

function %load_sextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; ldrsw x0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsw x0, [x0] ; trap: heap_oob
; ret