From ee3c1700dcfda97383a5319eb47c2ee48f63e9ce Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 24 Jul 2024 10:34:57 -0700 Subject: [PATCH 1/2] Cranelift(x64): Fix lowering for `store(bitop(x, load(addr)), addr)` for bitops on floats x86-64 allows us to do these kinds of read-modify-write operations in one instruction in general, however we also need to ensure that the non-memory operand is in a GPR. Because Cranelift allows `b{and,or,xor}`s on floating point types, that means we might need to insert a move from an XMM to a GPR. Co-Authored-By: Jamey Sharp --- cranelift/codegen/src/isa/x64/inst.isle | 14 +- ...ink-load-store-of-bitwise-op-on-float.clif | 177 ++++++++++++++++++ 2 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 56a03c51cb8f..600826eeceaa 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1629,11 +1629,23 @@ ;; Put a value into a GPR. ;; -;; Asserts that the value goes into a GPR. +;; Moves the value into a GPR if it is a type that would naturally go into an +;; XMM register. (decl put_in_gpr (Value) Gpr) + +;; Case for when the value naturally lives in a GPR. (rule (put_in_gpr val) + (if-let (value_type ty) val) + (if-let (type_register_class (RegisterClass.Gpr $true)) ty) (gpr_new (put_in_reg val))) +;; Case for when the value naturally lives in an XMM register and we must +;; bitcast it from an XMM into a GPR. +(rule (put_in_gpr val) + (if-let (value_type ty) val) + (if-let (type_register_class (RegisterClass.Xmm)) ty) + (bitcast_xmm_to_gpr ty (xmm_new (put_in_reg val)))) + ;; Put a value into a `GprMem`. ;; ;; Asserts that the value goes into a GPR. diff --git a/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif b/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif new file mode 100644 index 000000000000..085868bda22e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif @@ -0,0 +1,177 @@ +test compile precise-output +target x86_64 + +function %bor0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bor v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; orl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; orl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bor1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bor v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; orl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; orl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %band0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = band v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; andl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; andl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %band1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = band v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; andl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; andl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bxor0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bxor v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; xorl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; xorl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bxor1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bxor v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; xorl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; xorl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + From 13304f7159e69d63cfa94a4d21e33894fbc24877 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 24 Jul 2024 16:11:18 -0700 Subject: [PATCH 2/2] Match all GPR values in `put_in_gpr` and let assertion catch multi-reg values Instead of backtracking. --- cranelift/codegen/src/isa/x64/inst.isle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 600826eeceaa..2cd101be3936 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1636,7 +1636,7 @@ ;; Case for when the value naturally lives in a GPR. (rule (put_in_gpr val) (if-let (value_type ty) val) - (if-let (type_register_class (RegisterClass.Gpr $true)) ty) + (if-let (type_register_class (RegisterClass.Gpr _)) ty) (gpr_new (put_in_reg val))) ;; Case for when the value naturally lives in an XMM register and we must