Skip to content

Commit

Permalink
x64: Add support for load sinking in extend instructions (#8777)
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 authored Jun 11, 2024
1 parent bd4cfd7 commit 5158009
Show file tree
Hide file tree
Showing 2 changed files with 332 additions and 6 deletions.
22 changes: 16 additions & 6 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2155,29 +2155,39 @@
(decl extend_to_gpr (Value Type ExtendKind) Gpr)

;; If the value is already of the requested type, no extending is necessary.
(rule 2 (extend_to_gpr val @ (value_type ty) ty _kind)
(rule 3 (extend_to_gpr val @ (value_type ty) ty _kind)
val)

;; I32 -> I64 with op that produces a zero-extended value in a register.
;;
;; As a particular x64 extra-pattern matching opportunity, all the ALU
;; opcodes on 32-bits will zero-extend the upper 32-bits, so we can
;; even not generate a zero-extended move in this case.
(rule 1 (extend_to_gpr src @ (value_type $I32) $I64 (ExtendKind.Zero))
(rule 2 (extend_to_gpr src @ (value_type $I32) $I64 (ExtendKind.Zero))
(if-let $true (value32_zeros_upper32 src))
(add_range_fact src 64 0 0xffff_ffff))

(rule (extend_to_gpr (and val (value_type from_ty))
to_ty
kind)
;; Both extend instructions are guaranteed to load exactly the source type's size.
;; So we can use `sinkable_load_exact` here to sink loads for small types (<= 16 bits).
(rule 1 (extend_to_gpr (and (sinkable_load_exact val) (value_type from_ty)) to_ty kind)
(extend_to_gpr_types val from_ty to_ty kind))

;; Otherwise emit the extend from a Gpr to a Gpr.
(rule (extend_to_gpr (and val (value_type from_ty)) to_ty kind)
(extend_to_gpr_types val from_ty to_ty kind))

;; Calculates the correct extension mode for an extend between `from_ty` and `to_ty`.
(decl extend_to_gpr_types (GprMem Type Type ExtendKind) Gpr)
(rule (extend_to_gpr_types val from_ty to_ty kind)
(let ((from_bits u16 (ty_bits_u16 from_ty))
;; Use `operand_size_of_type` so that the we clamp the output to 32-
;; or 64-bit width types.
(to_bits u16 (operand_size_bits (operand_size_of_type_32_64 to_ty))))
(extend kind
to_ty
(ext_mode from_bits to_bits)
(put_in_gpr_mem val))))
val)))


;; Do a sign or zero extension of the given `GprMem`.
(decl extend (ExtendKind Type ExtMode GprMem) Gpr)
Expand Down
316 changes: 316 additions & 0 deletions cranelift/filetests/filetests/isa/x64/load-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
test compile precise-output
set unwind_info=false
target x86_64

function %load_uextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i16 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzbl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_uextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzbl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_uextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbq 0(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzbq (%rdi), %rax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_uextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzwl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzwl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_uextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzwq 0(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzwq (%rdi), %rax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_uextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i16 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsbl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsbl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsbl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsbl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsbq 0(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsbq (%rdi), %rax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movswl 0(%rdi), %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movswl (%rdi), %eax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movswq 0(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movswq (%rdi), %rax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %load_sextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movslq 0(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movslq (%rdi), %rax ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

0 comments on commit 5158009

Please sign in to comment.