-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
x64: Implement some minor optimizations related to SIMD lowerings (#8839
) * Add tests for patterns I'm about to optimize * x64: Optimize vector compare-and-branch This commit implements lowering optimizations for the `vall_true` and `vany_true` CLIF instructions when combined with `brif`. This is in the same manner as `icmp` and `fcmp` combined with `brif` where the result of the comparison is never materialized into a general purpose register which helps lower register pressure and remove some instructions. * x64: Optimize `vconst` with an all-ones pattern This has a single-instruction lowering which doesn't load from memory so it's probably cheaper than loading all-ones from memory.
- Loading branch information
1 parent
95fee6f
commit ee9e1ca
Showing
3 changed files
with
171 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
;;! target = "x86_64" | ||
;;! test = "compile" | ||
;;! flags = ["-Ccranelift-sse41"] | ||
|
||
(module | ||
(func $i8x16.all_true (param v128) (result i32) | ||
local.get 0 | ||
i8x16.all_true | ||
if (result i32) | ||
i32.const 100 | ||
else | ||
i32.const 200 | ||
end | ||
) | ||
|
||
(func $i16x8.all_true (param v128) (result i32) | ||
local.get 0 | ||
i16x8.all_true | ||
if (result i32) | ||
i32.const 100 | ||
else | ||
i32.const 200 | ||
end | ||
) | ||
|
||
(func $i32x4.all_true (param v128) (result i32) | ||
local.get 0 | ||
i32x4.all_true | ||
if (result i32) | ||
i32.const 100 | ||
else | ||
i32.const 200 | ||
end | ||
) | ||
|
||
(func $i64x2.all_true (param v128) (result i32) | ||
local.get 0 | ||
i64x2.all_true | ||
if (result i32) | ||
i32.const 100 | ||
else | ||
i32.const 200 | ||
end | ||
) | ||
|
||
(func $v128.any_true (param v128) (result i32) | ||
local.get 0 | ||
v128.any_true | ||
if (result i32) | ||
i32.const 100 | ||
else | ||
i32.const 200 | ||
end | ||
) | ||
) | ||
;; wasm[0]::function[0]::i8x16.all_true: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm7, %xmm7 | ||
;; pcmpeqb %xmm7, %xmm0 | ||
;; ptest %xmm0, %xmm0 | ||
;; je 0x21 | ||
;; 17: movl $0xc8, %eax | ||
;; jmp 0x26 | ||
;; 21: movl $0x64, %eax | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq | ||
;; | ||
;; wasm[0]::function[1]::i16x8.all_true: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm7, %xmm7 | ||
;; pcmpeqw %xmm7, %xmm0 | ||
;; ptest %xmm0, %xmm0 | ||
;; je 0x61 | ||
;; 57: movl $0xc8, %eax | ||
;; jmp 0x66 | ||
;; 61: movl $0x64, %eax | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq | ||
;; | ||
;; wasm[0]::function[2]::i32x4.all_true: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm7, %xmm7 | ||
;; pcmpeqd %xmm7, %xmm0 | ||
;; ptest %xmm0, %xmm0 | ||
;; je 0xa1 | ||
;; 97: movl $0xc8, %eax | ||
;; jmp 0xa6 | ||
;; a1: movl $0x64, %eax | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq | ||
;; | ||
;; wasm[0]::function[3]::i64x2.all_true: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm7, %xmm7 | ||
;; pcmpeqq %xmm7, %xmm0 | ||
;; ptest %xmm0, %xmm0 | ||
;; je 0xe2 | ||
;; d8: movl $0xc8, %eax | ||
;; jmp 0xe7 | ||
;; e2: movl $0x64, %eax | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq | ||
;; | ||
;; wasm[0]::function[4]::v128.any_true: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm7, %xmm7 | ||
;; pcmpeqb %xmm7, %xmm0 | ||
;; pmovmskb %xmm0, %ecx | ||
;; cmpl $0xffff, %ecx | ||
;; jne 0x126 | ||
;; 11c: movl $0xc8, %eax | ||
;; jmp 0x12b | ||
;; 126: movl $0x64, %eax | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
;;! target = "x86_64" | ||
;;! test = "compile" | ||
|
||
(module | ||
(func $zero (result v128) v128.const i64x2 0 0) | ||
(func $ones (result v128) v128.const i64x2 -1 -1) | ||
) | ||
;; wasm[0]::function[0]::zero: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pxor %xmm0, %xmm0 | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq | ||
;; | ||
;; wasm[0]::function[1]::ones: | ||
;; pushq %rbp | ||
;; movq %rsp, %rbp | ||
;; pcmpeqd %xmm0, %xmm0 | ||
;; movq %rbp, %rsp | ||
;; popq %rbp | ||
;; retq |