Skip to content

Commit

Permalink
Add ISLE rules to generate 128-bit comparisons (#9176)
Browse files Browse the repository at this point in the history
This commits adds pattern-matches to detect 128-bit comparisons encoded
in 64-bit comparisons and promote them to 128-bit to enable backends to
apply architecture-specific optimizations to these operations.
  • Loading branch information
alexcrichton authored Aug 28, 2024
1 parent 03003c8 commit eb896ad
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 2 deletions.
47 changes: 45 additions & 2 deletions cranelift/codegen/src/opts/icmp.isle
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@

;; Optimize icmp-of-icmp.
;; ne(icmp(ty, cc, x, y), 0) == icmp(ty, cc, x, y)
;; e.g. neq(ugt(x, y), 0) == ugt(x, y)
;; e.g. neq(ugt(x, y), 0) == ugt(x, y)
(rule (simplify (ne ty
(uextend_maybe _ inner @ (icmp ty _ _ _))
(iconst_u _ 0)))
(subsume inner))

;; eq(icmp(ty, cc, x, y), 0) == icmp(ty, cc_complement, x, y)
;; e.g. eq(ugt(x, y), 0) == ule(x, y)
;; e.g. eq(ugt(x, y), 0) == ule(x, y)
(rule (simplify (eq ty
(uextend_maybe _ (icmp ty cc x y))
(iconst_u _ 0)))
Expand Down Expand Up @@ -213,3 +213,46 @@
(rule (intcc_class (IntCC.SignedGreaterThanOrEqual)) 2)
(rule (intcc_class (IntCC.Equal)) 3)
(rule (intcc_class (IntCC.NotEqual)) 3)

;; Pattern-match what LLVM emits today for 128-bit comparisons into actual
;; 128-bit comparisons. Platforms like x64 and aarch64 have more optimal
;; lowerings for 128-bit arithmetic than the default structure.
(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(uge ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(uge ty a_hi b_hi)))
(uge ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(uge ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(sge ty a_hi b_hi)))
(sge ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ugt ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(ugt ty a_hi b_hi)))
(ugt ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ugt ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(sgt ty a_hi b_hi)))
(sgt ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ule ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(ule ty a_hi b_hi)))
(ule ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ule ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(sle ty a_hi b_hi)))
(sle ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ult ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(ult ty a_hi b_hi)))
(ult ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))

(rule (simplify (select ty (eq _ a_hi @ (value_type $I64) b_hi @ (value_type $I64))
(ult ty a_lo @ (value_type $I64) b_lo @ (value_type $I64))
(slt ty a_hi b_hi)))
(slt ty (iconcat $I64 a_lo a_hi) (iconcat $I64 b_lo b_hi)))
11 changes: 11 additions & 0 deletions cranelift/codegen/src/opts/selects.isle
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,14 @@
(rule (simplify (bor (ty_vec128 ty) (band ty (bnot ty c) y) (band ty x c))) (bitselect ty c x y))
(rule (simplify (bor (ty_vec128 ty) (band ty y (bnot ty c)) (band ty c x))) (bitselect ty c x y))
(rule (simplify (bor (ty_vec128 ty) (band ty y (bnot ty c)) (band ty x c))) (bitselect ty c x y))

;; Lift an extend operation outside of a `select` if the extend is happening
;; on both the consequent and the alternative.
(rule (simplify (select ty cond
(uextend ty a @ (value_type small))
(uextend ty b @ (value_type small))))
(uextend ty (select small cond a b)))
(rule (simplify (select ty cond
(sextend ty a @ (value_type small))
(sextend ty b @ (value_type small))))
(sextend ty (select small cond a b)))
22 changes: 22 additions & 0 deletions cranelift/filetests/filetests/egraph/select.clif
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,25 @@ block0(v0: i32, v1: i32):
; check: v6 = icmp sgt v0, v1
; check: v8 = bmask.i64 v6
; check: return v8

function %lift_uextend_out_of_select(i8, i32, i32) -> i64 {
block0(v0: i8, v1: i32, v2: i32):
v3 = uextend.i64 v1
v4 = uextend.i64 v2
v5 = select v0, v3, v4
return v5
}
; check: v6 = select v0, v1, v2
; check: v7 = uextend.i64 v6
; check: return v7

function %lift_sextend_out_of_select(i8, i32, i32) -> i64 {
block0(v0: i8, v1: i32, v2: i32):
v3 = sextend.i64 v1
v4 = sextend.i64 v2
v5 = select v0, v3, v4
return v5
}
; check: v6 = select v0, v1, v2
; check: v7 = sextend.i64 v6
; check: return v7
236 changes: 236 additions & 0 deletions tests/disas/i128-cmp.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
;;! target = "x86_64"
;;! test = "optimize"

(module
(func $lt_s (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.lt_u
local.get 1
local.get 3
i64.lt_s
local.get 1
local.get 3
i64.eq
select
)
(func $lt_u (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.lt_u
local.get 1
local.get 3
i64.lt_u
local.get 1
local.get 3
i64.eq
select
)
(func $le_s (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.le_u
local.get 1
local.get 3
i64.le_s
local.get 1
local.get 3
i64.eq
select
)
(func $le_u (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.le_u
local.get 1
local.get 3
i64.le_u
local.get 1
local.get 3
i64.eq
select
)
(func $gt_s (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.gt_u
local.get 1
local.get 3
i64.gt_s
local.get 1
local.get 3
i64.eq
select
)
(func $gt_u (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.gt_u
local.get 1
local.get 3
i64.gt_u
local.get 1
local.get 3
i64.eq
select
)
(func $ge_s (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.ge_u
local.get 1
local.get 3
i64.ge_s
local.get 1
local.get 3
i64.eq
select
)
(func $ge_u (param i64 i64 i64 i64) (result i32)
local.get 0
local.get 2
i64.ge_u
local.get 1
local.get 3
i64.ge_u
local.get 1
local.get 3
i64.eq
select
)
)
;; function u0:0(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @0034 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp slt v16, v17
;; v20 = uextend.i32 v18
;; @0034 return v20
;; }
;;
;; function u0:1(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @0047 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp ult v16, v17
;; v20 = uextend.i32 v18
;; @0047 return v20
;; }
;;
;; function u0:2(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @005a jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp sle v16, v17
;; v20 = uextend.i32 v18
;; @005a return v20
;; }
;;
;; function u0:3(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @006d jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp ule v16, v17
;; v20 = uextend.i32 v18
;; @006d return v20
;; }
;;
;; function u0:4(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @0080 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp sgt v16, v17
;; v20 = uextend.i32 v18
;; @0080 return v20
;; }
;;
;; function u0:5(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @0093 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp ugt v16, v17
;; v20 = uextend.i32 v18
;; @0093 return v20
;; }
;;
;; function u0:6(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @00a6 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp sge v16, v17
;; v20 = uextend.i32 v18
;; @00a6 return v20
;; }
;;
;; function u0:7(i64 vmctx, i64, i64, i64, i64, i64) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64):
;; @00b9 jump block1
;;
;; block1:
;; v16 = iconcat.i64 v2, v3
;; v17 = iconcat.i64 v4, v5
;; v18 = icmp uge v16, v17
;; v20 = uextend.i32 v18
;; @00b9 return v20
;; }

0 comments on commit eb896ad

Please sign in to comment.