Skip to content

Commit

Permalink
Cranelift(x64): Fix lowering for store(bitop(x, load(addr)), addr)
Browse files Browse the repository at this point in the history
…for bitops on floats (#9003)

* Cranelift(x64): Fix lowering for `store(bitop(x, load(addr)), addr)` for bitops on floats

x86-64 allows us to do these kinds of read-modify-write operations in one
instruction in general, however we also need to ensure that the non-memory
operand is in a GPR. Because Cranelift allows `b{and,or,xor}`s on floating point
types, that means we might need to insert a move from an XMM to a GPR.

Co-Authored-By: Jamey Sharp <jsharp@fastly.com>

* Match all GPR values in `put_in_gpr` and let assertion catch multi-reg values

Instead of backtracking.

---------

Co-authored-by: Jamey Sharp <jsharp@fastly.com>
  • Loading branch information
fitzgen and jameysharp authored Jul 24, 2024
1 parent bc3d612 commit 6f41aed
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 1 deletion.
14 changes: 13 additions & 1 deletion cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1629,11 +1629,23 @@

;; Put a value into a GPR.
;;
;; Asserts that the value goes into a GPR.
;; Moves the value into a GPR if it is a type that would naturally go into an
;; XMM register.
(decl put_in_gpr (Value) Gpr)

;; Case for when the value naturally lives in a GPR.
(rule (put_in_gpr val)
(if-let (value_type ty) val)
(if-let (type_register_class (RegisterClass.Gpr _)) ty)
(gpr_new (put_in_reg val)))

;; Case for when the value naturally lives in an XMM register and we must
;; bitcast it from an XMM into a GPR.
(rule (put_in_gpr val)
(if-let (value_type ty) val)
(if-let (type_register_class (RegisterClass.Xmm)) ty)
(bitcast_xmm_to_gpr ty (xmm_new (put_in_reg val))))

;; Put a value into a `GprMem`.
;;
;; Asserts that the value goes into a GPR.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
test compile precise-output
target x86_64

function %bor0(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = bor v1, v2
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; orl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; orl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %bor1(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = bor v2, v1
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; orl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; orl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %band0(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = band v1, v2
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; andl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; andl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %band1(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = band v2, v1
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; andl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; andl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %bxor0(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = bxor v1, v2
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; xorl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; xorl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

function %bxor1(i64, f32) {
block0(v0: i64, v1: f32):
v2 = load.f32 v0
v3 = bxor v2, v1
store v3, v0
return
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %ecx
; xorl %ecx, 0(%rdi)
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %ecx
; xorl %ecx, (%rdi) ; trap: heap_oob
; movq %rbp, %rsp
; popq %rbp
; retq

0 comments on commit 6f41aed

Please sign in to comment.