diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 56a03c51cb8f..2cd101be3936 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1629,11 +1629,23 @@ ;; Put a value into a GPR. ;; -;; Asserts that the value goes into a GPR. +;; Moves the value into a GPR if it is a type that would naturally go into an +;; XMM register. (decl put_in_gpr (Value) Gpr) + +;; Case for when the value naturally lives in a GPR. (rule (put_in_gpr val) + (if-let (value_type ty) val) + (if-let (type_register_class (RegisterClass.Gpr _)) ty) (gpr_new (put_in_reg val))) +;; Case for when the value naturally lives in an XMM register and we must +;; bitcast it from an XMM into a GPR. +(rule (put_in_gpr val) + (if-let (value_type ty) val) + (if-let (type_register_class (RegisterClass.Xmm)) ty) + (bitcast_xmm_to_gpr ty (xmm_new (put_in_reg val)))) + ;; Put a value into a `GprMem`. ;; ;; Asserts that the value goes into a GPR. diff --git a/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif b/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif new file mode 100644 index 000000000000..085868bda22e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sink-load-store-of-bitwise-op-on-float.clif @@ -0,0 +1,177 @@ +test compile precise-output +target x86_64 + +function %bor0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bor v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; orl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; orl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bor1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bor v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; orl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; orl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %band0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = band v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; andl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; andl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %band1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = band v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; andl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; andl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bxor0(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bxor v1, v2 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; xorl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; xorl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %bxor1(i64, f32) { +block0(v0: i64, v1: f32): + v2 = load.f32 v0 + v3 = bxor v2, v1 + store v3, v0 + return +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %xmm0, %ecx +; xorl %ecx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movd %xmm0, %ecx +; xorl %ecx, (%rdi) ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq +