Skip to content

Commit

Permalink
aarch64: Add special-case for widening multiplication
Browse files Browse the repository at this point in the history
This commit adds a special case to the lowering of 128-bit
multiplication on the aarch64 backend along the same lines as was done
in bytecodealliance#9136 for the x64 backend. Notably zero and sign-extended values
which are multiplied to produce a 128-bit result can skip some of the
arithmetic of the fully general 128-bit lowering.
  • Loading branch information
alexcrichton committed Sep 5, 2024
1 parent df89aa5 commit 3525dcd
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 0 deletions.
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,26 @@
(dst_lo Reg (madd $I64 x_lo y_lo (zero_reg))))
(value_regs dst_lo dst_hi)))

;; Special cases where the upper bits are sign-or-zero extended of the lower bits
;; so the calculation here is much simpler with just a `umulh` or `smulh`
;; instead of the additions above as well.
(rule (lower (has_type $I128 (imul (uextend x) (uextend y))))
(let (
(x Reg (put_in_reg_zext64 x))
(y Reg (put_in_reg_zext64 y))
)
(value_regs
(madd $I64 x y (zero_reg))
(umulh $I64 x y))))
(rule (lower (has_type $I128 (imul (sextend x) (sextend y))))
(let (
(x Reg (put_in_reg_sext64 x))
(y Reg (put_in_reg_sext64 y))
)
(value_regs
(madd $I64 x y (zero_reg))
(smulh $I64 x y))))

;; Case for i8x16, i16x8, and i32x4.
(rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y)))
(mul x y (vector_size ty)))
Expand Down
48 changes: 48 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/i128.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
test compile precise-output
set enable_llvm_abi_extensions=true
target aarch64

function %mul_uextend_i64(i64, i64) -> i128 {
block0(v0: i64, v1: i64):
v2 = uextend.i128 v0
v3 = uextend.i128 v1
v4 = imul v2, v3
return v4
}

; VCode:
; block0:
; madd x3, x0, x1, xzr
; umulh x1, x0, x1
; mov x0, x3
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mul x3, x0, x1
; umulh x1, x0, x1
; mov x0, x3
; ret

function %mul_sextend_i64(i64, i64) -> i128 {
block0(v0: i64, v1: i64):
v2 = sextend.i128 v0
v3 = sextend.i128 v1
v4 = imul v2, v3
return v4
}

; VCode:
; block0:
; madd x3, x0, x1, xzr
; smulh x1, x0, x1
; mov x0, x3
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mul x3, x0, x1
; smulh x1, x0, x1
; mov x0, x3
; ret

0 comments on commit 3525dcd

Please sign in to comment.