From a79cf76fe0995e8798db606495145ab80cb5cd4e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 21 Mar 2024 12:00:34 -0700 Subject: [PATCH] PCC: x64: insertlane instructions read only scalar-sized values. (#8207) * PCC: x64: insertlane instructions read only scalar-sized values. Also fix `clamp_range` on greater-than-64-bit values: no range fact is possible in this case (propagate `Option` a bit deeper to represent this). Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=67538. * Rebase to latest main with leaf-function changes and update test expectations. --- cranelift/codegen/src/isa/aarch64/pcc.rs | 4 +- cranelift/codegen/src/isa/x64/inst/args.rs | 20 ++ cranelift/codegen/src/isa/x64/pcc.rs | 35 ++- cranelift/codegen/src/machinst/pcc.rs | 30 ++- tests/disas/pcc-insertlane-x64-avx.wat | 272 ++++++++++++++++++++ tests/disas/pcc-insertlane-x64.wat | 273 +++++++++++++++++++++ 6 files changed, 618 insertions(+), 16 deletions(-) create mode 100644 tests/disas/pcc-insertlane-x64-avx.wat create mode 100644 tests/disas/pcc-insertlane-x64.wat diff --git a/cranelift/codegen/src/isa/aarch64/pcc.rs b/cranelift/codegen/src/isa/aarch64/pcc.rs index 55f79599ea3b..e17daffd477d 100644 --- a/cranelift/codegen/src/isa/aarch64/pcc.rs +++ b/cranelift/codegen/src/isa/aarch64/pcc.rs @@ -303,7 +303,7 @@ pub(crate) fn check( check_constant(ctx, vcode, rd, 64, constant) } else { check_output(ctx, vcode, rd, &[], |_vcode| { - Ok(Fact::max_range_for_width(64)) + Ok(Some(Fact::max_range_for_width(64))) }) } } @@ -426,7 +426,7 @@ fn check_addr<'a>( trace!( "checking a load: loaded_fact = {loaded_fact:?} result_fact = {result_fact:?}" ); - if ctx.subsumes_fact_optionals(Some(&loaded_fact), result_fact) { + if ctx.subsumes_fact_optionals(loaded_fact.as_ref(), result_fact) { Ok(()) } else { Err(PccError::UnsupportedFact) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 45ab8e6e8a5b..d648952715dd 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1371,6 +1371,26 @@ impl SseOpcode { _ => 8, } } + + /// Is `src2` with this opcode a scalar, as for lane insertions? + pub(crate) fn has_scalar_src2(self) -> bool { + match self { + SseOpcode::Pinsrb | SseOpcode::Pinsrw | SseOpcode::Pinsrd => true, + SseOpcode::Pmovsxbw + | SseOpcode::Pmovsxbd + | SseOpcode::Pmovsxbq + | SseOpcode::Pmovsxwd + | SseOpcode::Pmovsxwq + | SseOpcode::Pmovsxdq => true, + SseOpcode::Pmovzxbw + | SseOpcode::Pmovzxbd + | SseOpcode::Pmovzxbq + | SseOpcode::Pmovzxwd + | SseOpcode::Pmovzxwq + | SseOpcode::Pmovzxdq => true, + _ => false, + } + } } impl fmt::Debug for SseOpcode { diff --git a/cranelift/codegen/src/isa/x64/pcc.rs b/cranelift/codegen/src/isa/x64/pcc.rs index 64a8acb7d356..2942c5dc1b16 100644 --- a/cranelift/codegen/src/isa/x64/pcc.rs +++ b/cranelift/codegen/src/isa/x64/pcc.rs @@ -188,7 +188,7 @@ pub(crate) fn check( dst, .. } => check_output(ctx, vcode, dst.to_writable_reg(), &[], |_vcode| { - Ok(Fact::constant(64, 0)) + Ok(Some(Fact::constant(64, 0))) }), Inst::AluConstOp { dst, .. } => undefined_result(ctx, vcode, dst, 64, 64), @@ -319,7 +319,7 @@ pub(crate) fn check( Inst::Imm { simm64, dst, .. } => { check_output(ctx, vcode, dst.to_writable_reg(), &[], |_vcode| { - Ok(Fact::constant(64, simm64)) + Ok(Some(Fact::constant(64, simm64))) }) } @@ -629,6 +629,10 @@ pub(crate) fn check( let (ty, size) = match op { AvxOpcode::Vmovss => (F32, 32), AvxOpcode::Vmovsd => (F64, 64), + AvxOpcode::Vpinsrb => (I8, 8), + AvxOpcode::Vpinsrw => (I16, 16), + AvxOpcode::Vpinsrd => (I32, 32), + AvxOpcode::Vpinsrq => (I64, 64), // We assume all other operations happen on 128-bit values. _ => (I8X16, 128), @@ -767,6 +771,29 @@ pub(crate) fn check( RegMem::Reg { .. } => Ok(()), }, + Inst::XmmRmRImm { + dst, + ref src2, + size, + op, + .. + } if op.has_scalar_src2() => { + match <&RegMem>::from(src2) { + RegMem::Mem { ref addr } => { + check_load( + ctx, + None, + addr, + vcode, + size.to_type(), + size.to_bits().into(), + )?; + } + RegMem::Reg { .. } => {} + } + ensure_no_fact(vcode, dst.to_reg()) + } + Inst::XmmRmRImm { dst, ref src2, .. } => { match <&RegMem>::from(src2) { RegMem::Mem { ref addr } => { @@ -917,8 +944,8 @@ fn check_mem<'a>( loaded_fact, result_fact ); - if ctx.subsumes_fact_optionals(Some(&loaded_fact), result_fact) { - Ok(Some(loaded_fact.clone())) + if ctx.subsumes_fact_optionals(loaded_fact.as_ref(), result_fact) { + Ok(loaded_fact.clone()) } else { Err(PccError::UnsupportedFact) } diff --git a/cranelift/codegen/src/machinst/pcc.rs b/cranelift/codegen/src/machinst/pcc.rs index d737d83dacc3..a88fcc4d7cee 100644 --- a/cranelift/codegen/src/machinst/pcc.rs +++ b/cranelift/codegen/src/machinst/pcc.rs @@ -28,8 +28,10 @@ pub(crate) fn clamp_range( to_bits: u16, from_bits: u16, fact: Option, -) -> PccResult { - let max = if from_bits == 64 { +) -> PccResult> { + let max = if from_bits > 64 { + return Ok(None); + } else if from_bits == 64 { u64::MAX } else { (1u64 << from_bits) - 1 @@ -42,32 +44,40 @@ pub(crate) fn clamp_range( ); Ok(fact .and_then(|f| ctx.uextend(&f, from_bits, to_bits)) - .unwrap_or_else(|| { + .or_else(|| { let result = Fact::Range { bit_width: to_bits, min: 0, max, }; trace!(" -> clamping to {:?}", result); - result + Some(result) })) } pub(crate) fn check_subsumes(ctx: &FactContext, subsumer: &Fact, subsumee: &Fact) -> PccResult<()> { + check_subsumes_optionals(ctx, Some(subsumer), Some(subsumee)) +} + +pub(crate) fn check_subsumes_optionals( + ctx: &FactContext, + subsumer: Option<&Fact>, + subsumee: Option<&Fact>, +) -> PccResult<()> { trace!( "checking if derived fact {:?} subsumes stated fact {:?}", subsumer, subsumee ); - if ctx.subsumes(subsumer, subsumee) { + if ctx.subsumes_fact_optionals(subsumer, subsumee) { Ok(()) } else { Err(PccError::UnsupportedFact) } } -pub(crate) fn check_output) -> PccResult>( +pub(crate) fn check_output) -> PccResult>>( ctx: &FactContext, vcode: &mut VCode, out: Writable, @@ -76,14 +86,14 @@ pub(crate) fn check_output) -> PccResult ) -> PccResult<()> { if let Some(fact) = vcode.vreg_fact(out.to_reg().into()) { let result = f(vcode)?; - check_subsumes(ctx, &result, fact) + check_subsumes_optionals(ctx, result.as_ref(), Some(fact)) } else if ins.iter().any(|r| { vcode .vreg_fact(r.into()) .map(|fact| fact.propagates()) .unwrap_or(false) }) { - if let Ok(fact) = f(vcode) { + if let Ok(Some(fact)) = f(vcode) { trace!("setting vreg {:?} to {:?}", out, fact); vcode.set_vreg_fact(out.to_reg().into(), fact); } @@ -93,7 +103,7 @@ pub(crate) fn check_output) -> PccResult } } -pub(crate) fn check_unop PccResult>( +pub(crate) fn check_unop PccResult>>( ctx: &FactContext, vcode: &mut VCode, reg_width: u16, @@ -107,7 +117,7 @@ pub(crate) fn check_unop PccResult>( }) } -pub(crate) fn check_binop PccResult>( +pub(crate) fn check_binop PccResult>>( ctx: &FactContext, vcode: &mut VCode, reg_width: u16, diff --git a/tests/disas/pcc-insertlane-x64-avx.wat b/tests/disas/pcc-insertlane-x64-avx.wat new file mode 100644 index 000000000000..63a02e0dcef7 --- /dev/null +++ b/tests/disas/pcc-insertlane-x64-avx.wat @@ -0,0 +1,272 @@ +;;! target = "x86_64" +;;! test = "compile" +;;! flags = [ "-Oopt-level=0", "-Cpcc=y", "-Ccranelift-has-sse41=true", "-Ccranelift-has-avx=true" ] + +(module + (memory 1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load8_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load16_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load32_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load64_lane align=1 1) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f32.load + f32x4.replace_lane 0) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f64.load + f64x2.replace_lane 1) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f64.load + f64x2.replace_lane 0) + (func (param v128 i32) + local.get 1 + local.get 0 + f64x2.extract_lane 1 + f64.store) + (func (param v128 i32) + local.get 1 + local.get 0 + f32x4.extract_lane 1 + f32.store) + (func (param v128 i32) + local.get 1 + local.get 0 + i8x16.extract_lane_s 1 + i32.store8) + (func (param v128 i32) + local.get 1 + local.get 0 + i16x8.extract_lane_s 1 + i32.store16) + (func (param v128 i32) + local.get 1 + local.get 0 + i32x4.extract_lane 1 + i32.store) + (func (param v128 i32) + local.get 1 + local.get 0 + i64x2.extract_lane 1 + i64.store)) +;; function u0:0: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vmovdqu const(0), %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vpinsrb $1, %xmm7, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:1: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vmovdqu const(0), %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vpinsrw $1, %xmm7, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:2: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vmovdqu const(0), %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vpinsrd $1, %xmm7, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:3: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vmovdqu const(0), %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vpinsrq $1, %xmm7, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:4: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vinsertps $0, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:5: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vmovlhps %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:6: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r11d +;; movq 80(%rdi), %rsi +;; vmovsd 0(%rsi,%r11,1), %xmm1 +;; vmovsd %xmm0, %xmm1, %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:7: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vpshufd $238, %xmm0, %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vmovsd %xmm7, 0(%r11,%r10,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:8: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vpshufd $1, %xmm0, %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; vmovss %xmm7, 0(%r11,%r10,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:9: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vpextrb $1, %xmm0, %r11 +;; movsbl %r11b, %r11d +;; movl %edx, %esi +;; movq 80(%rdi), %rdi +;; movb %r11b, 0(%rdi,%rsi,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:10: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; vpextrw $1, %xmm0, %r11 +;; movswl %r11w, %r11d +;; movl %edx, %esi +;; movq 80(%rdi), %rdi +;; movw %r11w, 0(%rdi,%rsi,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:11: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r9d +;; movq 80(%rdi), %r10 +;; vpextrd $1, %xmm0, 0(%r10,%r9,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:12: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r9d +;; movq 80(%rdi), %r10 +;; vpextrq $1, %xmm0, 0(%r10,%r9,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret diff --git a/tests/disas/pcc-insertlane-x64.wat b/tests/disas/pcc-insertlane-x64.wat new file mode 100644 index 000000000000..48ea72a207b7 --- /dev/null +++ b/tests/disas/pcc-insertlane-x64.wat @@ -0,0 +1,273 @@ +;;! target = "x86_64" +;;! test = "compile" +;;! flags = [ "-Oopt-level=0", "-Cpcc=y", "-Ccranelift-has-sse41=true", "-Ccranelift-has-avx=false" ] + +(module + (memory 1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load8_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load16_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load32_lane align=1 1) + (func (param i32) (result v128) + local.get 0 + v128.const i32x4 0x29292928 0x206e6928 0x616d286d 0x206f7263 + v128.load64_lane align=1 1) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f32.load + f32x4.replace_lane 0) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f64.load + f64x2.replace_lane 1) + (func (param v128 i32) (result v128) + local.get 0 + local.get 1 + f64.load + f64x2.replace_lane 0) + (func (param v128 i32) + local.get 1 + local.get 0 + f64x2.extract_lane 1 + f64.store) + (func (param v128 i32) + local.get 1 + local.get 0 + f32x4.extract_lane 1 + f32.store) + (func (param v128 i32) + local.get 1 + local.get 0 + i8x16.extract_lane_s 1 + i32.store8) + (func (param v128 i32) + local.get 1 + local.get 0 + i16x8.extract_lane_s 1 + i32.store16) + (func (param v128 i32) + local.get 1 + local.get 0 + i32x4.extract_lane 1 + i32.store) + (func (param v128 i32) + local.get 1 + local.get 0 + i64x2.extract_lane 1 + i64.store)) +;; function u0:0: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movdqu const(0), %xmm0 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; pinsrb $1, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:1: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movdqu const(0), %xmm0 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; pinsrw $1, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:2: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movdqu const(0), %xmm0 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; pinsrd $1, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:3: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movdqu const(0), %xmm0 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; pinsrd.w $1, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:4: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; insertps $0, %xmm0, 0(%r11,%r10,1), %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:5: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r11d +;; movq 80(%rdi), %rsi +;; movdqu 0(%rsi,%r11,1), %xmm7 +;; movlhps %xmm0, %xmm7, %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:6: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r11d +;; movq 80(%rdi), %rsi +;; movsd 0(%rsi,%r11,1), %xmm1 +;; movsd %xmm0, %xmm1, %xmm0 +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:7: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; pshufd $238, %xmm0, %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; movsd %xmm7, 0(%r11,%r10,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:8: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; pshufd $1, %xmm0, %xmm7 +;; movl %edx, %r10d +;; movq 80(%rdi), %r11 +;; movss %xmm7, 0(%r11,%r10,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:9: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; pextrb $1, %xmm0, %r11 +;; movsbl %r11b, %r11d +;; movl %edx, %esi +;; movq 80(%rdi), %rdi +;; movb %r11b, 0(%rdi,%rsi,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:10: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; pextrw $1, %xmm0, %r11 +;; movswl %r11w, %r11d +;; movl %edx, %esi +;; movq 80(%rdi), %rdi +;; movw %r11w, 0(%rdi,%rsi,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:11: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r9d +;; movq 80(%rdi), %r10 +;; pextrd $1, %xmm0, 0(%r10,%r9,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:12: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; movl %edx, %r9d +;; movq 80(%rdi), %r10 +;; pextrq $1, %xmm0, 0(%r10,%r9,1) +;; jmp label1 +;; block1: +;; movq %rbp, %rsp +;; popq %rbp +;; ret