From c7be23545cb8ab5583e2cd2662dddae2d82faeb3 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 3 Feb 2024 18:26:44 +0000 Subject: [PATCH] x64: Add `{u,s}mulhi.i8` instruction support --- cranelift/codegen/src/isa/x64/lower.isle | 38 +++++++++---------- .../filetests/filetests/isa/x64/smulhi.clif | 31 +++++++++++++++ .../filetests/filetests/isa/x64/umulhi.clif | 31 +++++++++++++++ .../filetests/filetests/runtests/smulhi.clif | 9 +++++ .../filetests/filetests/runtests/umulhi.clif | 8 ++++ cranelift/fuzzgen/src/function_generator.rs | 2 - 6 files changed, 97 insertions(+), 22 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 2452966a131d..516e3151ba6c 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -4159,35 +4159,33 @@ ;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (umulhi a @ (value_type $I16) b)) - (let ((res ValueRegs (mul_hi $I16 $false a b)) - (hi Gpr (value_regs_get_gpr res 1))) - hi)) - -(rule (lower (umulhi a @ (value_type $I32) b)) - (let ((res ValueRegs (mul_hi $I32 $false a b)) - (hi Gpr (value_regs_get_gpr res 1))) +;; The umulhi instruction is not available for 8-bit types, so we can extend +;; the inputs, use the 16-bit multiply and shift the result down. +(rule 1 (lower (umulhi a @ (value_type $I8) b)) + (let ((a_ext Gpr (extend_to_gpr a $I16 (ExtendKind.Zero))) + (b_ext Gpr (extend_to_gpr b $I16 (ExtendKind.Zero))) + (mul Gpr (x64_mul $I16 a_ext b_ext)) + (hi Gpr (x64_shr $I64 mul (imm8_to_imm8_gpr 8)))) hi)) -(rule (lower (umulhi a @ (value_type $I64) b)) - (let ((res ValueRegs (mul_hi $I64 $false a b)) +(rule 0 (lower (umulhi a @ (value_type ty) b)) + (let ((res ValueRegs (mul_hi ty $false a b)) (hi Gpr (value_regs_get_gpr res 1))) hi)) ;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (smulhi a @ (value_type $I16) b)) - (let ((res ValueRegs (mul_hi $I16 $true a b)) - (hi Gpr (value_regs_get_gpr res 1))) - hi)) - -(rule (lower (smulhi a @ (value_type $I32) b)) - (let ((res ValueRegs (mul_hi $I32 $true a b)) - (hi Gpr (value_regs_get_gpr res 1))) +;; The smulhi instruction is not available for 8-bit types, so we can extend +;; the inputs, use the 16-bit multiply and shift the result down. +(rule 1 (lower (smulhi a @ (value_type $I8) b)) + (let ((a_ext Gpr (extend_to_gpr a $I16 (ExtendKind.Sign))) + (b_ext Gpr (extend_to_gpr b $I16 (ExtendKind.Sign))) + (mul Gpr (x64_mul $I16 a_ext b_ext)) + (hi Gpr (x64_sar $I64 mul (imm8_to_imm8_gpr 8)))) hi)) -(rule (lower (smulhi a @ (value_type $I64) b)) - (let ((res ValueRegs (mul_hi $I64 $true a b)) +(rule 0 (lower (smulhi a @ (value_type ty) b)) + (let ((res ValueRegs (mul_hi ty $true a b)) (hi Gpr (value_regs_get_gpr res 1))) hi)) diff --git a/cranelift/filetests/filetests/isa/x64/smulhi.clif b/cranelift/filetests/filetests/isa/x64/smulhi.clif index 25c6db99c8a2..fb8356e4b6c2 100644 --- a/cranelift/filetests/filetests/isa/x64/smulhi.clif +++ b/cranelift/filetests/filetests/isa/x64/smulhi.clif @@ -1,6 +1,37 @@ test compile precise-output target x86_64 +function %smulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = smulhi v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbl %dil, %eax +; movsbl %sil, %r8d +; imull %eax, %r8d, %eax +; sarq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movsbl %dil, %eax +; movsbl %sil, %r8d +; imull %r8d, %eax +; sarq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + function %f1(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = smulhi v0, v1 diff --git a/cranelift/filetests/filetests/isa/x64/umulhi.clif b/cranelift/filetests/filetests/isa/x64/umulhi.clif index e910de46ee6f..487d2a897b51 100644 --- a/cranelift/filetests/filetests/isa/x64/umulhi.clif +++ b/cranelift/filetests/filetests/isa/x64/umulhi.clif @@ -1,6 +1,37 @@ test compile precise-output target x86_64 +function %umulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = umulhi v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; movzbl %sil, %r8d +; imull %eax, %r8d, %eax +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; movzbl %sil, %r8d +; imull %r8d, %eax +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + function %f1(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = umulhi v0, v1 diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index 52a8d4d0ed98..df9f13adf377 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -8,6 +8,15 @@ target riscv64 target riscv64 has_c has_zcb +function %smulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = smulhi v0, v1 + return v2 +} +; run: %smulhi_i8(-2, -4) == 0 +; run: %smulhi_i8(2, -4) == -1 +; run: %smulhi_i8(255, 255) == 0 + function %smulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = smulhi v0, v1 diff --git a/cranelift/filetests/filetests/runtests/umulhi.clif b/cranelift/filetests/filetests/runtests/umulhi.clif index 464a2bc6700a..51d4eee45d92 100644 --- a/cranelift/filetests/filetests/runtests/umulhi.clif +++ b/cranelift/filetests/filetests/runtests/umulhi.clif @@ -6,6 +6,14 @@ target s390x target riscv64 target riscv64 has_c has_zcb +function %umulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i8(2, 4) == 0 +; run: %umulhi_i8(255, 255) == 254 + function %umulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = umulhi v0, v1 diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 743a6a5d6754..6ed5d8412ea0 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -523,8 +523,6 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - rets, (Opcode::UmulOverflow | Opcode::SmulOverflow, &[I128, I128]), (Opcode::Imul, &[I8X16, I8X16]), - // https://github.com/bytecodealliance/wasmtime/issues/5468 - (Opcode::Smulhi | Opcode::Umulhi, &[I8, I8]), // https://github.com/bytecodealliance/wasmtime/issues/4756 (Opcode::Udiv | Opcode::Sdiv, &[I128, I128]), // https://github.com/bytecodealliance/wasmtime/issues/5474