From 3525dcdb1d416da6875810c85855236de52006f4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 5 Sep 2024 08:27:52 -0700 Subject: [PATCH] aarch64: Add special-case for widening multiplication This commit adds a special case to the lowering of 128-bit multiplication on the aarch64 backend along the same lines as was done in #9136 for the x64 backend. Notably zero and sign-extended values which are multiplied to produce a 128-bit result can skip some of the arithmetic of the fully general 128-bit lowering. --- cranelift/codegen/src/isa/aarch64/lower.isle | 20 ++++++++ .../filetests/filetests/isa/aarch64/i128.clif | 48 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/aarch64/i128.clif diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 415d408549b6..7d18b602eed5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -820,6 +820,26 @@ (dst_lo Reg (madd $I64 x_lo y_lo (zero_reg)))) (value_regs dst_lo dst_hi))) +;; Special cases where the upper bits are sign-or-zero extended of the lower bits +;; so the calculation here is much simpler with just a `umulh` or `smulh` +;; instead of the additions above as well. +(rule (lower (has_type $I128 (imul (uextend x) (uextend y)))) + (let ( + (x Reg (put_in_reg_zext64 x)) + (y Reg (put_in_reg_zext64 y)) + ) + (value_regs + (madd $I64 x y (zero_reg)) + (umulh $I64 x y)))) +(rule (lower (has_type $I128 (imul (sextend x) (sextend y)))) + (let ( + (x Reg (put_in_reg_sext64 x)) + (y Reg (put_in_reg_sext64 y)) + ) + (value_regs + (madd $I64 x y (zero_reg)) + (smulh $I64 x y)))) + ;; Case for i8x16, i16x8, and i32x4. (rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y))) (mul x y (vector_size ty))) diff --git a/cranelift/filetests/filetests/isa/aarch64/i128.clif b/cranelift/filetests/filetests/isa/aarch64/i128.clif new file mode 100644 index 000000000000..bee69e094474 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/i128.clif @@ -0,0 +1,48 @@ +test compile precise-output +set enable_llvm_abi_extensions=true +target aarch64 + +function %mul_uextend_i64(i64, i64) -> i128 { +block0(v0: i64, v1: i64): + v2 = uextend.i128 v0 + v3 = uextend.i128 v1 + v4 = imul v2, v3 + return v4 +} + +; VCode: +; block0: +; madd x3, x0, x1, xzr +; umulh x1, x0, x1 +; mov x0, x3 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mul x3, x0, x1 +; umulh x1, x0, x1 +; mov x0, x3 +; ret + +function %mul_sextend_i64(i64, i64) -> i128 { +block0(v0: i64, v1: i64): + v2 = sextend.i128 v0 + v3 = sextend.i128 v1 + v4 = imul v2, v3 + return v4 +} + +; VCode: +; block0: +; madd x3, x0, x1, xzr +; smulh x1, x0, x1 +; mov x0, x3 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mul x3, x0, x1 +; smulh x1, x0, x1 +; mov x0, x3 +; ret +