Skip to content

Commit

Permalink
add i128 support for iabs on aarch64
Browse files Browse the repository at this point in the history
  • Loading branch information
p3achyjr committed Oct 8, 2023
1 parent fef8a90 commit 225c33c
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 1 deletion.
15 changes: 15 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,13 @@
(rn Reg)
(rm Reg))

;; A conditional-select inv operation (aka bit-flip, or x86 `not`).
(CSInv
(rd WritableReg)
(cond Cond)
(rn Reg)
(rm Reg))

;; A conditional-set operation.
(CSet
(rd WritableReg)
Expand Down Expand Up @@ -2388,6 +2395,14 @@
(MInst.CSNeg dst cond if_true if_false)
dst)))

;; Helper for generating a `CSInv` instruction. Conditionally flips all bits.
(decl csinv (Cond Reg Reg) ConsumesFlags)
(rule (csinv cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.CSInv dst cond if_true if_false)
dst)))

;; Helper for generating `MInst.CCmp` instructions.
;; Creates a new `ProducesFlags` from the supplied `ProducesFlags` followed
;; immediately by the `MInst.CCmp` instruction.
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,12 @@ impl MachInstEmit for Inst {
let rm = allocs.next(rm);
sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
}
&Inst::CSInv { rd, rn, rm, cond } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
sink.put4(enc_csel(rd, rn, rm, cond, 1, 0));
}
&Inst::CSet { rd, cond } => {
let rd = allocs.next_writable(rd);
sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
Expand Down
10 changes: 10 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,16 @@ fn test_aarch64_binemit() {
"8A258EDA",
"csneg x10, x12, x14, hs",
));
insns.push((
Inst::CSInv {
rd: writable_xreg(10),
rn: xreg(12),
rm: xreg(14),
cond: Cond::Hs,
},
"8A218EDA",
"csinv x10, x12, x14, hs",
));
insns.push((
Inst::CSet {
rd: writable_xreg(15),
Expand Down
12 changes: 12 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,11 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::CSInv { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
collector.reg_def(rd);
}
Expand Down Expand Up @@ -1508,6 +1513,13 @@ impl Inst {
let cond = cond.pretty_print(0, allocs);
format!("csneg {}, {}, {}, {}", rd, rn, rm, cond)
}
&Inst::CSInv { rd, rn, rm, cond } => {
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs);
let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs);
let cond = cond.pretty_print(0, allocs);
format!("csinv {}, {}, {}, {}", rd, rn, rm, cond)
}
&Inst::CSet { rd, cond } => {
let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs);
let cond = cond.pretty_print(0, allocs);
Expand Down
13 changes: 12 additions & 1 deletion cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@

;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _ _) (iabs x)))
(rule -1 (lower (has_type ty @ (multi_lane _ _) (iabs x)))
(vec_abs x (vector_size ty)))

(rule 2 (lower (has_type $I64 (iabs x)))
Expand All @@ -348,6 +348,17 @@
(rule 1 (lower (has_type (fits_in_32 ty) (iabs x)))
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))

; `cmp` upper bits, then `csneg` - 1 upper bits and `csneg` lower bits.
(rule (lower (has_type $I128 (iabs x)))
(let ((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1))
(cmp ProducesFlags (cmp_imm (OperandSize.Size64) x_hi (u8_into_imm12 0)))
(csinv ConsumesFlags (csinv (Cond.Gt) x_hi x_hi))
(csneg ConsumesFlags (csneg (Cond.Gt) x_lo x_lo)))
; order of csneg, csinv matters, b/c the output register order is lo:hi.
(with_flags cmp (consumes_flags_concat csneg csinv))))

;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I64X2 (avg_round x y)))
Expand Down
20 changes: 20 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/iabs.clif
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,23 @@ block0(v0: i64):
; cneg x0, x0, le
; ret

function %f11(i128) -> i128 {
block0(v0: i128):
v1 = iabs v0
return v1
}

; VCode:
; block0:
; subs xzr, x1, #0
; csneg x0, x0, x0, gt
; csinv x1, x1, x1, gt
; ret
;
; Disassembled:
; block0: ; offset 0x0
; cmp x1, #0
; cneg x0, x0, le
; cinv x1, x1, le
; ret

1 change: 1 addition & 0 deletions cranelift/filetests/filetests/runtests/i128-iabs.clif
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64

Expand Down

0 comments on commit 225c33c

Please sign in to comment.