From 06381668847d946ff55f9b0a68ef3e5ae6d4ad35 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 4 Oct 2023 07:40:00 -0700 Subject: [PATCH] Favor sign-extension in equality comparisons --- cranelift/codegen/src/isa/riscv64/inst.isle | 10 ++ .../filetests/isa/riscv64/condops.clif | 16 +-- .../filetests/isa/riscv64/select-float.clif | 40 +++---- .../filetests/isa/riscv64/select.clif | 106 +++++++----------- 4 files changed, 75 insertions(+), 97 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 925d86ea041b..9efb01abc7fd 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2707,11 +2707,21 @@ ;; that's returned. Note that comparisons compare full registers so ;; sign-extension according to the integer comparison performed here is ;; required. +;; +;; Also note that as a small optimization `Equal` and `NotEqual` use +;; sign-extension for 32-bit values since the same result is produced with +;; either zero-or-sign extension and many values are already sign-extended given +;; the RV64 instruction set (e.g. `addw` adds 32-bit values and sign extends), +;; theoretically resulting in more efficient codegen. (rule 2 (lower_int_compare (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 in_ty))))) (int_compare cc (zext a) (zext b))) (rule 3 (lower_int_compare (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 in_ty))))) (if (signed_cond_code cc)) (int_compare cc (sext a) (sext b))) +(rule 4 (lower_int_compare (maybe_uextend (icmp cc @ (IntCC.Equal) a b @ (value_type $I32)))) + (int_compare cc (sext a) (sext b))) +(rule 4 (lower_int_compare (maybe_uextend (icmp cc @ (IntCC.NotEqual) a b @ (value_type $I32)))) + (int_compare cc (sext a) (sext b))) ;; If the input is an `fcmp` then the `FCmp` return value is directly ;; convertible to `IntegerCompare` which can shave off an instruction from the diff --git a/cranelift/filetests/filetests/isa/riscv64/condops.clif b/cranelift/filetests/filetests/isa/riscv64/condops.clif index 500f22cd794a..6dbba5087c05 100644 --- a/cranelift/filetests/filetests/isa/riscv64/condops.clif +++ b/cranelift/filetests/filetests/isa/riscv64/condops.clif @@ -109,21 +109,17 @@ block0(v0: i32, v1: i8, v2: i8): ; VCode: ; block0: ; li a3,42 -; slli a5,a0,32 -; srli a4,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select a0,a1,a2##condition=(a4 eq a5) +; sext.w a5,a0 +; sext.w a3,a3 +; select a0,a1,a2##condition=(a5 eq a3) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a4, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; bne a4, a5, 0xc +; sext.w a5, a0 +; sext.w a3, a3 +; bne a5, a3, 0xc ; mv a0, a1 ; j 8 ; mv a0, a2 diff --git a/cranelift/filetests/filetests/isa/riscv64/select-float.clif b/cranelift/filetests/filetests/isa/riscv64/select-float.clif index 284e4f61823b..3e1b7ffceff1 100644 --- a/cranelift/filetests/filetests/isa/riscv64/select-float.clif +++ b/cranelift/filetests/filetests/isa/riscv64/select-float.clif @@ -121,22 +121,18 @@ block0(v0: i32, v1: f32, v2: f32): ; VCode: ; block0: -; li a3,42 -; slli a5,a0,32 -; srli a1,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select fa0,fa0,fa1##condition=(a1 eq a5) +; li a1,42 +; sext.w a5,a0 +; sext.w a1,a1 +; select fa0,fa0,fa1##condition=(a5 eq a1) ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a1, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; beq a1, a5, 8 +; addi a1, zero, 0x2a +; sext.w a5, a0 +; sext.w a1, a1 +; beq a5, a1, 8 ; fmv.d fa0, fa1 ; ret @@ -150,22 +146,18 @@ block0(v0: i32, v1: f64, v2: f64): ; VCode: ; block0: -; li a3,42 -; slli a5,a0,32 -; srli a1,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select fa0,fa0,fa1##condition=(a1 eq a5) +; li a1,42 +; sext.w a5,a0 +; sext.w a1,a1 +; select fa0,fa0,fa1##condition=(a5 eq a1) ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a1, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; beq a1, a5, 8 +; addi a1, zero, 0x2a +; sext.w a5, a0 +; sext.w a1, a1 +; beq a5, a1, 8 ; fmv.d fa0, fa1 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/select.clif b/cranelift/filetests/filetests/isa/riscv64/select.clif index 25be44f463a3..57b9ec24c866 100644 --- a/cranelift/filetests/filetests/isa/riscv64/select.clif +++ b/cranelift/filetests/filetests/isa/riscv64/select.clif @@ -365,21 +365,17 @@ block0(v0: i32, v1: i8, v2: i8): ; VCode: ; block0: ; li a3,42 -; slli a5,a0,32 -; srli a4,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select a0,a1,a2##condition=(a4 eq a5) +; sext.w a5,a0 +; sext.w a3,a3 +; select a0,a1,a2##condition=(a5 eq a3) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a4, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; bne a4, a5, 0xc +; sext.w a5, a0 +; sext.w a3, a3 +; bne a5, a3, 0xc ; mv a0, a1 ; j 8 ; mv a0, a2 @@ -396,21 +392,17 @@ block0(v0: i32, v1: i16, v2: i16): ; VCode: ; block0: ; li a3,42 -; slli a5,a0,32 -; srli a4,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select a0,a1,a2##condition=(a4 eq a5) +; sext.w a5,a0 +; sext.w a3,a3 +; select a0,a1,a2##condition=(a5 eq a3) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a4, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; bne a4, a5, 0xc +; sext.w a5, a0 +; sext.w a3, a3 +; bne a5, a3, 0xc ; mv a0, a1 ; j 8 ; mv a0, a2 @@ -427,21 +419,17 @@ block0(v0: i32, v1: i32, v2: i32): ; VCode: ; block0: ; li a3,42 -; slli a5,a0,32 -; srli a4,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select a0,a1,a2##condition=(a4 eq a5) +; sext.w a5,a0 +; sext.w a3,a3 +; select a0,a1,a2##condition=(a5 eq a3) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a4, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; bne a4, a5, 0xc +; sext.w a5, a0 +; sext.w a3, a3 +; bne a5, a3, 0xc ; mv a0, a1 ; j 8 ; mv a0, a2 @@ -458,21 +446,17 @@ block0(v0: i32, v1: i64, v2: i64): ; VCode: ; block0: ; li a3,42 -; slli a5,a0,32 -; srli a4,a5,32 -; slli a3,a3,32 -; srli a5,a3,32 -; select a0,a1,a2##condition=(a4 eq a5) +; sext.w a5,a0 +; sext.w a3,a3 +; select a0,a1,a2##condition=(a5 eq a3) ; ret ; ; Disassembled: ; block0: ; offset 0x0 ; addi a3, zero, 0x2a -; slli a5, a0, 0x20 -; srli a4, a5, 0x20 -; slli a3, a3, 0x20 -; srli a5, a3, 0x20 -; bne a4, a5, 0xc +; sext.w a5, a0 +; sext.w a3, a3 +; bne a5, a3, 0xc ; mv a0, a1 ; j 8 ; mv a0, a2 @@ -491,20 +475,18 @@ block0(v0: i32, v1: i128, v2: i128): ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; sd s6,-8(sp) -; sd s7,-16(sp) +; sd s1,-8(sp) +; sd s8,-16(sp) ; add sp,-16 ; block0: -; mv t0,a1 -; li s6,42 -; slli a1,a0,32 -; srli a5,a1,32 -; slli a0,s6,32 -; srli s7,a0,32 -; select [a0,a1],[t0,a2],[a3,a4]##condition=(a5 eq s7) +; mv s8,a1 +; li a5,42 +; sext.w s1,a0 +; sext.w a5,a5 +; select [a0,a1],[s8,a2],[a3,a4]##condition=(s1 eq a5) ; add sp,+16 -; ld s6,-8(sp) -; ld s7,-16(sp) +; ld s1,-8(sp) +; ld s8,-16(sp) ; ld ra,8(sp) ; ld fp,0(sp) ; add sp,+16 @@ -516,25 +498,23 @@ block0(v0: i32, v1: i128, v2: i128): ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; sd s6, -8(sp) -; sd s7, -0x10(sp) +; sd s1, -8(sp) +; sd s8, -0x10(sp) ; addi sp, sp, -0x10 ; block1: ; offset 0x1c -; mv t0, a1 -; addi s6, zero, 0x2a -; slli a1, a0, 0x20 -; srli a5, a1, 0x20 -; slli a0, s6, 0x20 -; srli s7, a0, 0x20 -; bne a5, s7, 0x10 -; mv a0, t0 +; mv s8, a1 +; addi a5, zero, 0x2a +; sext.w s1, a0 +; sext.w a5, a5 +; bne s1, a5, 0x10 +; mv a0, s8 ; mv a1, a2 ; j 0xc ; mv a0, a3 ; mv a1, a4 ; addi sp, sp, 0x10 -; ld s6, -8(sp) -; ld s7, -0x10(sp) +; ld s1, -8(sp) +; ld s8, -0x10(sp) ; ld ra, 8(sp) ; ld s0, 0(sp) ; addi sp, sp, 0x10