Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x64: Peephole optimization for x < 0 #4625

Merged
merged 4 commits into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,38 @@
(rule (lower (icmp cc a @ (value_type $I128) b))
(lower_icmp_bool (emit_cmp cc a b)))

;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))

;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))

;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))

;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))

;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
;; one. To note: what is different here about the output values is that each
;; lane will be filled with all 1s or all 0s according to the comparison,
Expand Down
132 changes: 132 additions & 0 deletions cranelift/filetests/filetests/isa/x64/b1.clif
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,135 @@ block2:
; popq %rbp
; ret

function %test_x_slt_0_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_slt_0_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sgt_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sgt v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sgt_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sgt v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sle_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sle v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_0_sle_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sle v1, v0
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_sge_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sge v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

function %test_x_sge_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sge v0, v1
return v2
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

60 changes: 60 additions & 0 deletions cranelift/filetests/filetests/isa/x64/branches.clif
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,63 @@ block2:
; popq %rbp
; ret

function %f6(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpq $0, %rdi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret

function %f7(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}

; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpl $0, %edi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret