Skip to content

Commit

Permalink
x64: Peephole optimization for x < 0 (#4625)
Browse files Browse the repository at this point in the history
#4625

Fixes #4607
  • Loading branch information
elliottt authored Aug 9, 2022
1 parent a36a52a commit ed7dfd3
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 0 deletions.
32 changes: 32 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,38 @@
(rule (lower (icmp cc a @ (value_type $I128) b))
(lower_icmp_bool (emit_cmp cc a b)))

;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))

;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))

;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))

;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))

;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
;; one. To note: what is different here about the output values is that each
;; lane will be filled with all 1s or all 0s according to the comparison,
Expand Down
132 changes: 132 additions & 0 deletions cranelift/filetests/filetests/isa/x64/b1.clif
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,135 @@ block2:
; popq %rbp
; ret

function %test_x_slt_0_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_slt_0_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sgt_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sgt v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sgt_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sgt v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sle_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sle v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sle_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sle v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_sge_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sge v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_sge_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sge v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

60 changes: 60 additions & 0 deletions cranelift/filetests/filetests/isa/x64/branches.clif
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,63 @@ block2:
; popq %rbp
; ret

function %f6(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpq $0, %rdi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret

function %f7(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpl $0, %edi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret

0 comments on commit ed7dfd3

Please sign in to comment.