From b4aa663219eb60ff2d7b8f43eaf48d83e92e78bd Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 15 Aug 2022 23:06:50 -0700 Subject: [PATCH 1/4] Add a test for iadd_pairwise with swiden input --- .../filetests/isa/x64/simd-pairwise-add.clif | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif diff --git a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif new file mode 100644 index 000000000000..c3f46a422ec2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif @@ -0,0 +1,39 @@ +test compile precise-output +target x86_64 + +function %fn1(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = swiden_low v0 + v2 = swiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm5 +; load_const VCodeConstant(0), %xmm0 +; movdqa %xmm5, %xmm7 +; pmaddubsw %xmm0, %xmm7, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %fn2(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = swiden_low v0 + v2 = swiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm4 +; pmaddwd %xmm0, %xmm4, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + From 46b89d62829b21f5015c33a59bf6a7731582562c Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 15 Aug 2022 23:17:17 -0700 Subject: [PATCH 2/4] Implement iadd_pairwise for swiden_{low,high} input --- cranelift/codegen/src/isa/x64/inst.isle | 12 ++++ cranelift/codegen/src/isa/x64/lower.isle | 16 ++++++ cranelift/codegen/src/isa/x64/lower.rs | 56 +------------------ cranelift/codegen/src/isa/x64/lower/isle.rs | 18 ++++++ .../filetests/isa/x64/simd-pairwise-add.clif | 4 +- 5 files changed, 50 insertions(+), 56 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 8043a1059bd9..88f872ae9b5b 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2582,6 +2582,10 @@ dst)))) dst)) +(decl x64_pmaddubsw (Xmm XmmMem) Xmm) +(rule (x64_pmaddubsw src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Pmaddubsw) src1 src2)) + ;; Helper for creating `insertps` instructions. (decl x64_insertps (Xmm XmmMem u8) Xmm) (rule (x64_insertps src1 src2 lane) @@ -3255,6 +3259,14 @@ (ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets))))) +;;;; iadd_pairwise constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl iadd_pairwise_mul_const_16 () VCodeConstant) +(extern constructor iadd_pairwise_mul_const_16 iadd_pairwise_mul_const_16) + +(decl iadd_pairwise_mul_const_32 () VCodeConstant) +(extern constructor iadd_pairwise_mul_const_32 iadd_pairwise_mul_const_32) + ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 759a956ec6f9..7a271dd9ba4e 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3189,3 +3189,19 @@ ;; Add this second set of converted lanes to the original to properly handle ;; values greater than max signed int. (x64_paddd tmp1 dst))) + +;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower + (has_type $I16X8 (iadd_pairwise + (swiden_low val @ (value_type $I8X16)) + (swiden_high val)))) + (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16)))) + (x64_pmaddubsw mul_const val))) + +(rule (lower + (has_type $I32X4 (iadd_pairwise + (swiden_low val @ (value_type $I16X8)) + (swiden_high val)))) + (let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32)))) + (x64_pmaddwd val mul_const))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3e4decfda905..bcae73eaae5a 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -566,63 +566,11 @@ fn lower_insn_to_regs( } Opcode::IaddPairwise => { - if let (Some(swiden_low), Some(swiden_high)) = ( + if let (Some(_swiden_low), Some(_swiden_high)) = ( matches_input(ctx, inputs[0], Opcode::SwidenLow), matches_input(ctx, inputs[1], Opcode::SwidenHigh), ) { - let swiden_input = &[ - InsnInput { - insn: swiden_low, - input: 0, - }, - InsnInput { - insn: swiden_high, - input: 0, - }, - ]; - - let input_ty = ctx.input_ty(swiden_low, 0); - let output_ty = ctx.output_ty(insn, 0); - let src0 = put_input_in_reg(ctx, swiden_input[0]); - let src1 = put_input_in_reg(ctx, swiden_input[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if src0 != src1 { - unimplemented!( - "iadd_pairwise not implemented for general case with different inputs" - ); - } - match (input_ty, output_ty) { - (types::I8X16, types::I16X8) => { - static MUL_CONST: [u8; 16] = [0x01; 16]; - let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); - let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16)); - ctx.emit(Inst::xmm_mov( - SseOpcode::Movdqa, - RegMem::reg(mul_const_reg.to_reg()), - dst, - )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst)); - } - (types::I16X8, types::I32X4) => { - static MUL_CONST: [u8; 16] = [ - 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, - 0x01, 0x00, 0x01, 0x00, - ]; - let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); - let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8)); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pmaddwd, - RegMem::reg(mul_const_reg.to_reg()), - dst, - )); - } - _ => { - unimplemented!("Type not supported for {:?}", op); - } - } + implemented_in_isle(ctx); } else if let (Some(uwiden_low), Some(uwiden_high)) = ( matches_input(ctx, inputs[0], Opcode::UwidenLow), matches_input(ctx, inputs[1], Opcode::UwidenHigh), diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index e7fa6e9437d9..0f07a9a3985c 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -781,6 +781,18 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { self.lower_ctx .use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH)) } + + #[inline] + fn iadd_pairwise_mul_const_16(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_16)) + } + + #[inline] + fn iadd_pairwise_mul_const_32(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_32)) + } } impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { @@ -907,3 +919,9 @@ const UINT_MASK: [u8; 16] = [ const UINT_MASK_HIGH: [u8; 16] = [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, ]; + +const IADD_PAIRWISE_MUL_CONST_16: [u8; 16] = [0x01; 16]; + +const IADD_PAIRWISE_MUL_CONST_32: [u8; 16] = [ + 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, +]; diff --git a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif index c3f46a422ec2..6c8fa82c373d 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif @@ -14,8 +14,8 @@ block0(v0: i8x16): ; block0: ; movdqa %xmm0, %xmm5 ; load_const VCodeConstant(0), %xmm0 -; movdqa %xmm5, %xmm7 -; pmaddubsw %xmm0, %xmm7, %xmm0 +; movdqa %xmm5, %xmm6 +; pmaddubsw %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret From 3312e6af05ae9e85bb18c54bd398e08911b7771b Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 15 Aug 2022 23:18:46 -0700 Subject: [PATCH 3/4] Add a test case for iadd_pairwise with uwiden input --- .../filetests/isa/x64/simd-pairwise-add.clif | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif index 6c8fa82c373d..0eff854be6d6 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif @@ -28,11 +28,49 @@ block0(v0: i16x8): return v3 } +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm3 +; pmaddwd %xmm0, %xmm3, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %fn3(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_low v0 + v2 = uwiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm4 +; pmaddubsw %xmm0, %xmm4, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret + +function %fn4(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = uwiden_low v0 + v2 = uwiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} + ; pushq %rbp ; movq %rsp, %rbp ; block0: ; load_const VCodeConstant(0), %xmm4 -; pmaddwd %xmm0, %xmm4, %xmm0 +; pxor %xmm0, %xmm4, %xmm0 +; load_const VCodeConstant(1), %xmm8 +; pmaddwd %xmm0, %xmm8, %xmm0 +; load_const VCodeConstant(2), %xmm11 +; paddd %xmm0, %xmm11, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret From 7987c35efd531e2d5e4d2cfbe1d201461f4ef8dc Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 15 Aug 2022 23:48:10 -0700 Subject: [PATCH 4/4] Implement iadd_pairwise with uwiden --- cranelift/codegen/src/isa/x64/inst.isle | 6 + cranelift/codegen/src/isa/x64/lower.isle | 21 ++++ cranelift/codegen/src/isa/x64/lower.rs | 110 +----------------- cranelift/codegen/src/isa/x64/lower/isle.rs | 20 ++++ .../filetests/isa/x64/simd-pairwise-add.clif | 12 +- 5 files changed, 55 insertions(+), 114 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 88f872ae9b5b..e0ea0a6c7b80 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -3267,6 +3267,12 @@ (decl iadd_pairwise_mul_const_32 () VCodeConstant) (extern constructor iadd_pairwise_mul_const_32 iadd_pairwise_mul_const_32) +(decl iadd_pairwise_xor_const_32 () VCodeConstant) +(extern constructor iadd_pairwise_xor_const_32 iadd_pairwise_xor_const_32) + +(decl iadd_pairwise_addd_const_32 () VCodeConstant) +(extern constructor iadd_pairwise_addd_const_32 iadd_pairwise_addd_const_32) + ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 7a271dd9ba4e..53f07b98e09d 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3205,3 +3205,24 @@ (swiden_high val)))) (let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32)))) (x64_pmaddwd val mul_const))) + +(rule (lower + (has_type $I16X8 (iadd_pairwise + (uwiden_low val @ (value_type $I8X16)) + (uwiden_high val)))) + (let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16)))) + (x64_pmaddubsw val mul_const))) + +(rule (lower + (has_type $I32X4 (iadd_pairwise + (uwiden_low val @ (value_type $I16X8)) + (uwiden_high val)))) + (let ((xor_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_xor_const_32))) + (dst Xmm (x64_pxor val xor_const)) + + (madd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32))) + (dst Xmm (x64_pmaddwd dst madd_const)) + + (addd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_addd_const_32)))) + (x64_paddd dst addd_const))) + diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index bcae73eaae5a..1a31db7545e5 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -561,117 +561,11 @@ fn lower_insn_to_regs( | Opcode::FcvtToUint | Opcode::FcvtToSint | Opcode::FcvtToUintSat - | Opcode::FcvtToSintSat => { + | Opcode::FcvtToSintSat + | Opcode::IaddPairwise => { implemented_in_isle(ctx); } - Opcode::IaddPairwise => { - if let (Some(_swiden_low), Some(_swiden_high)) = ( - matches_input(ctx, inputs[0], Opcode::SwidenLow), - matches_input(ctx, inputs[1], Opcode::SwidenHigh), - ) { - implemented_in_isle(ctx); - } else if let (Some(uwiden_low), Some(uwiden_high)) = ( - matches_input(ctx, inputs[0], Opcode::UwidenLow), - matches_input(ctx, inputs[1], Opcode::UwidenHigh), - ) { - let uwiden_input = &[ - InsnInput { - insn: uwiden_low, - input: 0, - }, - InsnInput { - insn: uwiden_high, - input: 0, - }, - ]; - - let input_ty = ctx.input_ty(uwiden_low, 0); - let output_ty = ctx.output_ty(insn, 0); - let src0 = put_input_in_reg(ctx, uwiden_input[0]); - let src1 = put_input_in_reg(ctx, uwiden_input[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if src0 != src1 { - unimplemented!( - "iadd_pairwise not implemented for general case with different inputs" - ); - } - match (input_ty, output_ty) { - (types::I8X16, types::I16X8) => { - static MUL_CONST: [u8; 16] = [0x01; 16]; - let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); - let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16)); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pmaddubsw, - RegMem::reg(mul_const_reg.to_reg()), - dst, - )); - } - (types::I16X8, types::I32X4) => { - static PXOR_CONST: [u8; 16] = [ - 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, - 0x00, 0x80, 0x00, 0x80, - ]; - let pxor_const = - ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST)); - let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const( - pxor_const, - pxor_const_reg, - types::I16X8, - )); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pxor, - RegMem::reg(pxor_const_reg.to_reg()), - dst, - )); - - static MADD_CONST: [u8; 16] = [ - 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, - 0x01, 0x00, 0x01, 0x00, - ]; - let madd_const = - ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST)); - let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const( - madd_const, - madd_const_reg, - types::I16X8, - )); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pmaddwd, - RegMem::reg(madd_const_reg.to_reg()), - dst, - )); - static ADDD_CONST2: [u8; 16] = [ - 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x01, 0x00, - ]; - let addd_const2 = - ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2)); - let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const( - addd_const2, - addd_const2_reg, - types::I16X8, - )); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Paddd, - RegMem::reg(addd_const2_reg.to_reg()), - dst, - )); - } - _ => { - unimplemented!("Type not supported for {:?}", op); - } - } - } else { - unimplemented!("Operands not supported for {:?}", op); - } - } Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => { let input_ty = ctx.input_ty(insn, 0); let output_ty = ctx.output_ty(insn, 0); diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 0f07a9a3985c..4c623a3a9a62 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -793,6 +793,18 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { self.lower_ctx .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_32)) } + + #[inline] + fn iadd_pairwise_xor_const_32(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_XOR_CONST_32)) + } + + #[inline] + fn iadd_pairwise_addd_const_32(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_ADDD_CONST_32)) + } } impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { @@ -925,3 +937,11 @@ const IADD_PAIRWISE_MUL_CONST_16: [u8; 16] = [0x01; 16]; const IADD_PAIRWISE_MUL_CONST_32: [u8; 16] = [ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, ]; + +const IADD_PAIRWISE_XOR_CONST_32: [u8; 16] = [ + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, +]; + +const IADD_PAIRWISE_ADDD_CONST_32: [u8; 16] = [ + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, +]; diff --git a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif index 0eff854be6d6..dfb6c763c2aa 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-pairwise-add.clif @@ -48,8 +48,8 @@ block0(v0: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_const VCodeConstant(0), %xmm4 -; pmaddubsw %xmm0, %xmm4, %xmm0 +; load_const VCodeConstant(0), %xmm3 +; pmaddubsw %xmm0, %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -65,10 +65,10 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_const VCodeConstant(0), %xmm4 -; pxor %xmm0, %xmm4, %xmm0 -; load_const VCodeConstant(1), %xmm8 -; pmaddwd %xmm0, %xmm8, %xmm0 +; load_const VCodeConstant(0), %xmm3 +; pxor %xmm0, %xmm3, %xmm0 +; load_const VCodeConstant(1), %xmm7 +; pmaddwd %xmm0, %xmm7, %xmm0 ; load_const VCodeConstant(2), %xmm11 ; paddd %xmm0, %xmm11, %xmm0 ; movq %rbp, %rsp