Skip to content

Commit

Permalink
PCC: x64: insertlane instructions read only scalar-sized values.
Browse files Browse the repository at this point in the history
  • Loading branch information
cfallin committed Mar 21, 2024
1 parent c6d923a commit 583a0a3
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 0 deletions.
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,26 @@ impl SseOpcode {
_ => 8,
}
}

/// Is `src2` with this opcode a scalar, as for lane insertions?
pub(crate) fn has_scalar_src2(self) -> bool {
match self {
SseOpcode::Pinsrb | SseOpcode::Pinsrw | SseOpcode::Pinsrd => true,
SseOpcode::Pmovsxbw
| SseOpcode::Pmovsxbd
| SseOpcode::Pmovsxbq
| SseOpcode::Pmovsxwd
| SseOpcode::Pmovsxwq
| SseOpcode::Pmovsxdq => true,
SseOpcode::Pmovzxbw
| SseOpcode::Pmovzxbd
| SseOpcode::Pmovzxbq
| SseOpcode::Pmovzxwd
| SseOpcode::Pmovzxwq
| SseOpcode::Pmovzxdq => true,
_ => false,
}
}
}

impl fmt::Debug for SseOpcode {
Expand Down
27 changes: 27 additions & 0 deletions cranelift/codegen/src/isa/x64/pcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,10 @@ pub(crate) fn check(
let (ty, size) = match op {
AvxOpcode::Vmovss => (F32, 32),
AvxOpcode::Vmovsd => (F64, 64),
AvxOpcode::Vpinsrb => (I8, 8),
AvxOpcode::Vpinsrw => (I16, 16),
AvxOpcode::Vpinsrd => (I32, 32),
AvxOpcode::Vpinsrq => (I64, 64),

// We assume all other operations happen on 128-bit values.
_ => (I8X16, 128),
Expand Down Expand Up @@ -767,6 +771,29 @@ pub(crate) fn check(
RegMem::Reg { .. } => Ok(()),
},

Inst::XmmRmRImm {
dst,
ref src2,
size,
op,
..
} if op.has_scalar_src2() => {
match <&RegMem>::from(src2) {
RegMem::Mem { ref addr } => {
check_load(
ctx,
None,
addr,
vcode,
size.to_type(),
size.to_bits().into(),
)?;
}
RegMem::Reg { .. } => {}
}
ensure_no_fact(vcode, dst.to_reg())
}

Inst::XmmRmRImm { dst, ref src2, .. } => {
match <&RegMem>::from(src2) {
RegMem::Mem { ref addr } => {
Expand Down
101 changes: 101 additions & 0 deletions tests/disas/pcc-insertlane-x64-avx.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
;;! target = "x86_64"
;;! test = "compile"
;;! flags = [ "-Oopt-level=0", "-Cpcc=y", "-Ccranelift-has-sse41=true", "-Ccranelift-has-avx=true" ]

(module
(memory 1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load8_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load16_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load32_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load64_lane align=1 1))
;; function u0:0:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; vmovdqu const(0), %xmm7
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; vpinsrb $1, %xmm7, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:1:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; vmovdqu const(0), %xmm7
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; vpinsrw $1, %xmm7, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:2:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; vmovdqu const(0), %xmm7
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; vpinsrd $1, %xmm7, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:3:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; vmovdqu const(0), %xmm7
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; vpinsrq $1, %xmm7, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
101 changes: 101 additions & 0 deletions tests/disas/pcc-insertlane-x64.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
;;! target = "x86_64"
;;! test = "compile"
;;! flags = [ "-Oopt-level=0", "-Cpcc=y", "-Ccranelift-has-sse41=true", "-Ccranelift-has-avx=false" ]

(module
(memory 1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load8_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load16_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load32_lane align=1 1)
(func (param i32) (result v128)
local.get 0
v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263
v128.load64_lane align=1 1))
;; function u0:0:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; movdqu const(0), %xmm0
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; pinsrb $1, %xmm0, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:1:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; movdqu const(0), %xmm0
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; pinsrw $1, %xmm0, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:2:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; movdqu const(0), %xmm0
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; pinsrd $1, %xmm0, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret
;;
;; function u0:3:
;; pushq %rbp
;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
;; movq %rsp, %rbp
;; movq 8(%rdi), %r10
;; movq 0(%r10), %r10
;; cmpq %rsp, %r10
;; jnbe #trap=stk_ovf
;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
;; block0:
;; movdqu const(0), %xmm0
;; movl %edx, %r10d
;; movq 80(%rdi), %r11
;; pinsrd.w $1, %xmm0, 0(%r11,%r10,1), %xmm0
;; jmp label1
;; block1:
;; movq %rbp, %rsp
;; popq %rbp
;; ret

0 comments on commit 583a0a3

Please sign in to comment.