-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cranelift(x64): Fix lowering for
store(bitop(x, load(addr)), addr)
…
…for bitops on floats (#9003) * Cranelift(x64): Fix lowering for `store(bitop(x, load(addr)), addr)` for bitops on floats x86-64 allows us to do these kinds of read-modify-write operations in one instruction in general, however we also need to ensure that the non-memory operand is in a GPR. Because Cranelift allows `b{and,or,xor}`s on floating point types, that means we might need to insert a move from an XMM to a GPR. Co-Authored-By: Jamey Sharp <jsharp@fastly.com> * Match all GPR values in `put_in_gpr` and let assertion catch multi-reg values Instead of backtracking. --------- Co-authored-by: Jamey Sharp <jsharp@fastly.com>
- Loading branch information
1 parent
bc3d612
commit 6f41aed
Showing
2 changed files
with
190 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
177 changes: 177 additions & 0 deletions
177
cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
test compile precise-output | ||
target x86_64 | ||
|
||
function %bor0(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = bor v1, v2 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; orl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; orl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %bor1(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = bor v2, v1 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; orl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; orl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %band0(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = band v1, v2 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; andl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; andl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %band1(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = band v2, v1 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; andl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; andl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %bxor0(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = bxor v1, v2 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; xorl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; xorl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %bxor1(i64, f32) { | ||
block0(v0: i64, v1: f32): | ||
v2 = load.f32 v0 | ||
v3 = bxor v2, v1 | ||
store v3, v0 | ||
return | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; movd %xmm0, %ecx | ||
; xorl %ecx, 0(%rdi) | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; movd %xmm0, %ecx | ||
; xorl %ecx, (%rdi) ; trap: heap_oob | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|