Skip to content

Commit

Permalink
Cranelift: fix branch-of-icmp/fcmp regression: look through uextend.
Browse files Browse the repository at this point in the history
In bytecodealliance#5031, we removed `bool` types from CLIF, using integers instead for
"truthy" values. This greatly simplified the IR, and was generally an
improvement.

However, because x86's `SETcc` instruction sets only the low 8 bits of a
register, we chose to use `i8` types as the result of `icmp` and `fcmp`,
to avoid the need for a masking operation when materializing the result.

Unfortunately this means that uses of truthy values often now have
`uextend` operations, especially when coming from Wasm (where truthy
values are naturally `i32`-typed). For example, where we previously had
`(brz (icmp ...))`, we now have `(brz (uextend (icmp ...)))`.

It's arguable whether or not we should switch to `i32` truthy values --
in most cases we can avoid materializing a value that's immediately used
for a branch or select, so a mask would in most cases be unnecessary,
and it would be a win at the IR level -- but irrespective of that, this
change *did* regress our generated code quality: our backends had
patterns for e.g. `(brz (icmp ...))` but not with the `uextend`, so we
were *always* materializing truthy values. Many blocks thus ended with
"cmp; setcc; cmp; test; branch" rather than "cmp; branch".

In bytecodealliance#5391 we noticed this and fixed it on x64, but it was a general
problem on aarch64 and riscv64 as well. This PR introduces a
`maybe_uextend` extractor that "looks through" uextends, and uses it
where we consume truthy values, thus fixing the regression.  This PR
also adds compile filetests to ensure we don't regress again.
  • Loading branch information
cfallin committed Dec 22, 2022
1 parent 24a2f8c commit 4622d4c
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 62 deletions.
18 changes: 9 additions & 9 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1706,9 +1706,9 @@
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty
(select (icmp cc
x @ (value_type in_ty)
y)
(select (maybe_uextend (icmp cc
x @ (value_type in_ty)
y))
rn
rm)))
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty)))
Expand All @@ -1719,7 +1719,7 @@
rm)))

(rule (lower (has_type ty
(select (fcmp cc x @ (value_type in_ty) y)
(select (maybe_uextend (fcmp cc x @ (value_type in_ty) y))
rn
rm)))
(let ((cond Cond (fp_cond_code cc)))
Expand Down Expand Up @@ -1757,7 +1757,7 @@
;;;; Rules for `select_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty
(select_spectre_guard (icmp cc x @ (value_type in_ty) y)
(select_spectre_guard (maybe_uextend (icmp cc x @ (value_type in_ty) y))
if_true
if_false)))
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty))
Expand Down Expand Up @@ -2434,7 +2434,7 @@
;;; Rules for `brz`/`brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; `brz` following `icmp`
(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
(rule (lower_branch (brz (maybe_uextend (icmp cc x @ (value_type ty) y)) _ _) targets)
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y ty))
;; Negate the condition for `brz`.
(cond Cond (invert_cond (cond_code (flags_and_cc_cc comparison))))
Expand All @@ -2446,7 +2446,7 @@
not_taken
(cond_br_cond cond))))))
;; `brnz` following `icmp`
(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
(rule (lower_branch (brnz (maybe_uextend (icmp cc x @ (value_type ty) y)) _ _) targets)
(let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y ty))
(cond Cond (cond_code (flags_and_cc_cc comparison)))
(taken BranchTarget (branch_target targets 0))
Expand All @@ -2457,7 +2457,7 @@
not_taken
(cond_br_cond cond))))))
;; `brz` following `fcmp`
(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
(rule (lower_branch (brz (maybe_uextend (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(cond Cond (invert_cond cond)) ;; negate for `brz`
(taken BranchTarget (branch_target targets 0))
Expand All @@ -2467,7 +2467,7 @@
(cond_br taken not_taken
(cond_br_cond cond))))))
;; `brnz` following `fcmp`
(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
(rule (lower_branch (brnz (maybe_uextend (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1930,11 +1930,11 @@
(lower_brz_or_nz (IntCC.NotEqual) cmp targets $I64)))

(rule 1
(lower_branch (brz (icmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brz (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets)
(lower_br_icmp (intcc_inverse cc) a b targets ty))

(rule 1
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
(lower_br_fcmp (floatcc_inverse cc) a b targets ty))

;;;;
Expand All @@ -1950,11 +1950,11 @@
(lower_brz_or_nz (IntCC.NotEqual) cmp targets $I64)))

(rule 1
(lower_branch (brnz (icmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brnz (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets)
(lower_br_icmp cc a b targets ty))

(rule 1
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brnz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
(lower_br_fcmp cc a b targets ty))

;;;
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c)) x y))

(rule 1
(lower (has_type ty (select (icmp cc a b) x y)))
(lower (has_type ty (select (maybe_uextend (icmp cc a b)) x y)))
(gen_select_reg cc a b x y))

;;;;; Rules for `bitselect`;;;;;;;;;
Expand Down
39 changes: 15 additions & 24 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1670,22 +1670,22 @@
;; - `CC.BE -> C = 1 OR Z = 1` (below or equal)
;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)

(rule (lower (has_type ty (select (fcmp (FloatCC.Ordered) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Ordered) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NP) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.Unordered) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Unordered) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.P) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThan) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.GreaterThan) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NBE) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThanOrEqual) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NB) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThan) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.B) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y)))
(with_flags (x64_ucomis b a) (cmove_from_values ty (CC.BE) x y)))

;; Certain FloatCC variants are implemented by flipping the operands of the
Expand All @@ -1699,16 +1699,16 @@
;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
;; to `CC.NBE`), we also avoid these unordered cases.

(rule (lower (has_type ty (select (fcmp (FloatCC.LessThan) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.LessThan) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_from_values ty (CC.NBE) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.LessThanOrEqual) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.LessThanOrEqual) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_from_values ty (CC.NB) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThan) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_from_values ty (CC.B) x y)))

(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_from_values ty (CC.BE) x y)))

;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
Expand All @@ -1724,18 +1724,18 @@
;; More details about the CLIF semantics for `fcmp` are available at
;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.

(rule (lower (has_type ty (select (fcmp (FloatCC.Equal) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Equal) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))

(rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.NotEqual) a b)) x y)))
(with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))

;; We also can lower `select`s that depend on an `icmp` test, but more simply
;; than the `fcmp` variants above. In these cases, we lower to a `CMP`
;; instruction plus a `CMOV`; recall that `cmove_from_values` here may emit more
;; than one instruction for certain types (e.g., XMM-held, I128).

(rule (lower (has_type ty (select (icmp cc a @ (value_type (fits_in_64 a_ty)) b) x y)))
(rule (lower (has_type ty (select (maybe_uextend (icmp cc a @ (value_type (fits_in_64 a_ty)) b)) x y)))
(let ((size OperandSize (raw_operand_size_of_type a_ty)))
(with_flags (x64_cmp size b a) (cmove_from_values ty cc x y))))

Expand Down Expand Up @@ -2873,20 +2873,11 @@

;; Rules for `brz` and `brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 2 (lower_branch (brz (icmp cc a b) _ _) (two_targets taken not_taken))
(rule 2 (lower_branch (brz (maybe_uextend (icmp cc a b)) _ _) (two_targets taken not_taken))
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
(emit_side_effect (jmp_cond_icmp cmp taken not_taken))))

(rule 2 (lower_branch (brz (uextend (icmp cc a b)) _ _) (two_targets taken not_taken))
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
(emit_side_effect (jmp_cond_icmp cmp taken not_taken))))


(rule 2 (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken))
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
(emit_side_effect (jmp_cond_fcmp cmp taken not_taken))))

(rule 2 (lower_branch (brz (uextend (fcmp cc a b)) _ _) (two_targets taken not_taken))
(rule 2 (lower_branch (brz (maybe_uextend (fcmp cc a b)) _ _) (two_targets taken not_taken))
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
(emit_side_effect (jmp_cond_fcmp cmp taken not_taken))))

Expand Down
15 changes: 15 additions & 0 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,21 @@ macro_rules! isle_lower_prelude_methods {
self.lower_ctx.sink_inst(inst);
}

#[inline]
fn maybe_uextend(&mut self, value: Value) -> Option<Value> {
if let Some(def_inst) = self.def_inst(value) {
if let InstructionData::Unary {
opcode: Opcode::Uextend,
arg,
} = self.lower_ctx.data(def_inst)
{
return Some(*arg);
}
}

Some(value)
}

#[inline]
fn preg_to_reg(&mut self, preg: PReg) -> Reg {
preg.into()
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/prelude_lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@
(decl pure partial is_sinkable_inst (Value) Inst)
(extern constructor is_sinkable_inst is_sinkable_inst)

;; Match a uextend or any other instruction, "seeing through" the uextend if
;; present.
(decl maybe_uextend (Value) Value)
(extern extractor maybe_uextend maybe_uextend)

;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Emit an instruction.
Expand Down
36 changes: 24 additions & 12 deletions cranelift/filetests/filetests/isa/aarch64/condbr.clif
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ block0(v0: i128, v1: i128):
function %f(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = icmp eq v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block2

block1:
Expand All @@ -177,7 +178,8 @@ block2:
function %f(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = icmp eq v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand Down Expand Up @@ -239,7 +241,8 @@ block1:
function %i128_bricmp_eq(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp eq v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -260,7 +263,8 @@ block1:
function %i128_bricmp_ne(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp ne v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -281,7 +285,8 @@ block1:
function %i128_bricmp_slt(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp slt v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -306,7 +311,8 @@ block1:
function %i128_bricmp_ult(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp ult v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -331,7 +337,8 @@ block1:
function %i128_bricmp_sle(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp sle v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -357,7 +364,8 @@ block1:
function %i128_bricmp_ule(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp ule v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -383,7 +391,8 @@ block1:
function %i128_bricmp_sgt(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp sgt v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -408,7 +417,8 @@ block1:
function %i128_bricmp_ugt(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp ugt v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -433,7 +443,8 @@ block1:
function %i128_bricmp_sge(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp sge v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand All @@ -459,7 +470,8 @@ block1:
function %i128_bricmp_uge(i128, i128) {
block0(v0: i128, v1: i128):
v2 = icmp uge v0, v1
brnz v2, block1
v3 = uextend.i32 v2
brnz v3, block1
jump block1

block1:
Expand Down
29 changes: 29 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/select.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
test compile precise-output
target aarch64

function %f0(i32, i32, i64, i64) -> i64 {
block0(v0: i32, v1: i32, v2: i64, v3: i64):
v4 = icmp eq v0, v1
v5 = uextend.i32 v4
v6 = select.i64 v5, v2, v3
return v6
}

; block0:
; subs wzr, w0, w1
; csel x0, x2, x3, eq
; ret

function %f0(f32, f32, i64, i64) -> i64 {
block0(v0: f32, v1: f32, v2: i64, v3: i64):
v4 = fcmp eq v0, v1
v5 = uextend.i32 v4
v6 = select.i64 v5, v2, v3
return v6
}

; block0:
; fcmp s0, s1
; csel x0, x0, x1, eq
; ret

Loading

0 comments on commit 4622d4c

Please sign in to comment.