From 4e34dd82399e8eeaa3842519437731742c0a5152 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 13 Jan 2022 13:00:29 -0800 Subject: [PATCH] cranelift: Port `ushr` SIMD lowerings to ISLE on x64 --- cranelift/codegen/src/isa/x64/inst.isle | 5 + cranelift/codegen/src/isa/x64/lower.isle | 73 +++- cranelift/codegen/src/isa/x64/lower.rs | 167 +--------- cranelift/codegen/src/isa/x64/lower/isle.rs | 37 ++- .../x64/lower/isle/generated_code.manifest | 4 +- .../src/isa/x64/lower/isle/generated_code.rs | 311 ++++++++++++------ 6 files changed, 315 insertions(+), 282 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index d7ad48e2db1f..3a6f4089f54d 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1305,6 +1305,11 @@ (rule (psllq src1 src2) (xmm_rmi_reg (SseOpcode.Psllq) src1 src2)) +;; Helper for creating `psrlw` instructions. +(decl psrlw (Reg RegMemImm) Reg) +(rule (psrlw src1 src2) + (xmm_rmi_reg (SseOpcode.Psrlw) src1 src2)) + ;; Helper for creating `psrld` instructions. (decl psrld (Reg RegMemImm) Reg) (rule (psrld src1 src2) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 2ea073684528..04513a1c5d13 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -595,13 +595,17 @@ ;; When the shift amount is known, we can statically (i.e. at compile time) ;; determine the mask to use and only emit that. +(decl ishl_i8x16_mask_for_const (u32) SyntheticAmode) +(extern constructor ishl_i8x16_mask_for_const ishl_i8x16_mask_for_const) (rule (ishl_i8x16_mask (RegMemImm.Imm amt)) (ishl_i8x16_mask_for_const amt)) ;; Otherwise, we must emit the entire mask table and dynamically (i.e. at run -;; time) find the correct mask offset in the table. We do this use `lea` to find -;; the base address of the mask table and then complex addressing to offset to -;; the right mask: `base_address + amt << 4` +;; time) find the correct mask offset in the table. We use `lea` to find the +;; base address of the mask table and then complex addressing to offset to the +;; right mask: `base_address + amt << 4` +(decl ishl_i8x16_mask_table () SyntheticAmode) +(extern constructor ishl_i8x16_mask_table ishl_i8x16_mask_table) (rule (ishl_i8x16_mask (RegMemImm.Reg amt)) (let ((mask_table SyntheticAmode (ishl_i8x16_mask_table)) (base_mask_addr Reg (lea mask_table)) @@ -613,14 +617,6 @@ (rule (ishl_i8x16_mask (RegMemImm.Mem amt)) (ishl_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None))))) -;; Get the address of the mask for a constant 8x16 shift amount. -(decl ishl_i8x16_mask_for_const (u32) SyntheticAmode) -(extern constructor ishl_i8x16_mask_for_const ishl_i8x16_mask_for_const) - -;; Get the address of the mask table for a dynamic 8x16 shift amount. -(decl ishl_i8x16_mask_table () SyntheticAmode) -(extern constructor ishl_i8x16_mask_table ishl_i8x16_mask_table) - ;; 16x8, 32x4, and 64x2 shifts can each use a single instruction. (rule (lower (has_type $I16X8 (ishl src amt))) (value_reg (psllw (put_in_reg src) @@ -671,6 +667,61 @@ (let ((amt_ Reg (lo_reg amt))) (shr_i128 (put_in_regs src) amt_))) +;; SSE. + +;; There are no 8x16 shifts in x64. Do the same 16x8-shift-and-mask thing we do +;; with 8x16 `ishl`. +(rule (lower (has_type $I8X16 (ushr src amt))) + (let ((src_ Reg (put_in_reg src)) + (amt_gpr RegMemImm (put_in_reg_mem_imm amt)) + (amt_xmm RegMemImm (reg_mem_imm_to_xmm amt_gpr)) + ;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be + ;; correct for half of the lanes; the others must be fixed up with + ;; the mask below. + (unmasked Reg (psrlw src_ amt_xmm)) + (mask_addr SyntheticAmode (ushr_i8x16_mask amt_gpr)) + (mask Reg (x64_load $I8X16 mask_addr (ExtKind.None)))) + (value_reg (sse_and $I8X16 unmasked (RegMem.Reg mask))))) + +;; Get the address of the mask to use when fixing up the lanes that weren't +;; correctly generated by the 16x8 shift. +(decl ushr_i8x16_mask (RegMemImm) SyntheticAmode) + +;; When the shift amount is known, we can statically (i.e. at compile time) +;; determine the mask to use and only emit that. +(decl ushr_i8x16_mask_for_const (u32) SyntheticAmode) +(extern constructor ushr_i8x16_mask_for_const ushr_i8x16_mask_for_const) +(rule (ushr_i8x16_mask (RegMemImm.Imm amt)) + (ushr_i8x16_mask_for_const amt)) + +;; Otherwise, we must emit the entire mask table and dynamically (i.e. at run +;; time) find the correct mask offset in the table. We use `lea` to find the +;; base address of the mask table and then complex addressing to offset to the +;; right mask: `base_address + amt << 4` +(decl ushr_i8x16_mask_table () SyntheticAmode) +(extern constructor ushr_i8x16_mask_table ushr_i8x16_mask_table) +(rule (ushr_i8x16_mask (RegMemImm.Reg amt)) + (let ((mask_table SyntheticAmode (ushr_i8x16_mask_table)) + (base_mask_addr Reg (lea mask_table)) + (mask_offset Reg (shl $I64 amt (Imm8Reg.Imm8 4)))) + (amode_to_synthetic_amode (amode_imm_reg_reg_shift 0 + base_mask_addr + mask_offset + 0)))) +(rule (ushr_i8x16_mask (RegMemImm.Mem amt)) + (ushr_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None))))) + +;; 16x8, 32x4, and 64x2 shifts can each use a single instruction. +(rule (lower (has_type $I16X8 (ushr src amt))) + (value_reg (psrlw (put_in_reg src) + (reg_mem_imm_to_xmm (put_in_reg_mem_imm amt))))) +(rule (lower (has_type $I32X4 (ushr src amt))) + (value_reg (psrld (put_in_reg src) + (reg_mem_imm_to_xmm (put_in_reg_mem_imm amt))))) +(rule (lower (has_type $I64X2 (ushr src amt))) + (value_reg (psrlq (put_in_reg src) + (reg_mem_imm_to_xmm (put_in_reg_mem_imm amt))))) + ;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 0f150318fb64..606b208c17a8 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1539,10 +1539,11 @@ fn lower_insn_to_regs>( | Opcode::Bnot | Opcode::Bitselect | Opcode::Vselect + | Opcode::Ushr | Opcode::Sshr | Opcode::Ishl => implemented_in_isle(ctx), - Opcode::Ushr | Opcode::Rotl | Opcode::Rotr => { + Opcode::Rotl | Opcode::Rotr => { let dst_ty = ctx.output_ty(insn, 0); debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty); @@ -1558,11 +1559,7 @@ fn lower_insn_to_regs>( // This implementation uses the last two encoding methods. let (size, lhs) = match dst_ty { types::I8 | types::I16 => match op { - Opcode::Ushr => ( - OperandSize::Size32, - extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32), - ), - Opcode::Rotl | Opcode::Rotr => ( + Opcode::Rotr => ( OperandSize::from_ty(dst_ty), put_input_in_reg(ctx, inputs[0]), ), @@ -1589,8 +1586,6 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let shift_kind = match op { - Opcode::Ushr => ShiftKind::ShiftRightLogical, - Opcode::Rotl => ShiftKind::RotateLeft, Opcode::Rotr => ShiftKind::RotateRight, _ => unreachable!(), }; @@ -1607,9 +1602,6 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]); match op { - Opcode::Ushr | Opcode::Rotl => { - implemented_in_isle(ctx); - } Opcode::Rotr => { // (mov tmp, src) // (ushr.i128 tmp, amt) @@ -1642,159 +1634,8 @@ fn lower_insn_to_regs>( } _ => unreachable!(), } - } else if dst_ty == types::I8X16 && op == Opcode::Ushr { - // Since the x86 instruction set does not have any 8x16 shift instructions (even in higher feature sets - // like AVX), we lower the `ishl.i8x16` and `ushr.i8x16` to a sequence of instructions. The basic idea, - // whether the `shift_by` amount is an immediate or not, is to use a 16x8 shift and then mask off the - // incorrect bits to 0s (see below for handling signs in `sshr.i8x16`). - let src = put_input_in_reg(ctx, inputs[0]); - let shift_by = input_to_reg_mem_imm(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - // If necessary, move the shift index into the lowest bits of a vector register. - let shift_by_moved = match &shift_by { - RegMemImm::Imm { .. } => shift_by.clone(), - RegMemImm::Reg { reg } => { - let tmp_shift_by = ctx.alloc_tmp(dst_ty).only_reg().unwrap(); - ctx.emit(Inst::gpr_to_xmm( - SseOpcode::Movd, - RegMem::reg(*reg), - OperandSize::Size32, - tmp_shift_by, - )); - RegMemImm::reg(tmp_shift_by.to_reg()) - } - RegMemImm::Mem { .. } => unimplemented!("load shift amount to XMM register"), - }; - - // Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be correct for half of the lanes; - // the others must be fixed up with the mask below. - let shift_opcode = match op { - Opcode::Ushr => SseOpcode::Psrlw, - _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), - }; - ctx.emit(Inst::gen_move(dst, src, dst_ty)); - ctx.emit(Inst::xmm_rmi_reg(shift_opcode, shift_by_moved, dst)); - - // Choose which mask to use to fixup the shifted lanes. Since we must use a 16x8 shift, we need to fix - // up the bits that migrate from one half of the lane to the other. Each 16-byte mask (which rustfmt - // forces to multiple lines) is indexed by the shift amount: e.g. if we shift right by 0 (no movement), - // we want to retain all the bits so we mask with `0xff`; if we shift right by 1, we want to retain all - // bits except the MSB so we mask with `0x7f`; etc. - const USHR_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - ]; - - let mask = match op { - Opcode::Ushr => &USHR_MASKS, - _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), - }; - - // Figure out the address of the shift mask. - let mask_address = match shift_by { - RegMemImm::Imm { simm32 } => { - // When the shift amount is known, we can statically (i.e. at compile time) determine the mask to - // use and only emit that. - debug_assert!(simm32 < 8); - let mask_offset = simm32 as usize * 16; - let mask_constant = ctx.use_constant(VCodeConstantData::WellKnown( - &mask[mask_offset..mask_offset + 16], - )); - SyntheticAmode::ConstantOffset(mask_constant) - } - RegMemImm::Reg { reg } => { - // Otherwise, we must emit the entire mask table and dynamically (i.e. at run time) find the correct - // mask offset in the table. We do this use LEA to find the base address of the mask table and then - // complex addressing to offset to the right mask: `base_address + shift_by * 4` - let base_mask_address = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let mask_offset = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let mask_constant = ctx.use_constant(VCodeConstantData::WellKnown(mask)); - ctx.emit(Inst::lea( - SyntheticAmode::ConstantOffset(mask_constant), - base_mask_address, - )); - ctx.emit(Inst::gen_move(mask_offset, reg, types::I64)); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(4), - mask_offset, - )); - Amode::imm_reg_reg_shift( - 0, - base_mask_address.to_reg(), - mask_offset.to_reg(), - 0, - ) - .into() - } - RegMemImm::Mem { addr: _ } => unimplemented!("load mask address"), - }; - - // Load the mask into a temporary register, `mask_value`. - let mask_value = ctx.alloc_tmp(dst_ty).only_reg().unwrap(); - ctx.emit(Inst::load(dst_ty, mask_address, mask_value, ExtKind::None)); - - // Remove the bits that would have disappeared in a true 8x16 shift. TODO in the future, - // this AND instruction could be coalesced with the load above. - let sse_op = match dst_ty { - types::F32X4 => SseOpcode::Andps, - types::F64X2 => SseOpcode::Andpd, - _ => SseOpcode::Pand, - }; - ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::from(mask_value), dst)); } else { - // For the remaining packed shifts not covered above, x86 has implementations that can either: - // - shift using an immediate - // - shift using a dynamic value given in the lower bits of another XMM register. - let src = put_input_in_reg(ctx, inputs[0]); - let shift_by = input_to_reg_mem_imm(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let sse_op = match dst_ty { - types::I16X8 => match op { - Opcode::Ushr => SseOpcode::Psrlw, - _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), - }, - types::I32X4 => match op { - Opcode::Ushr => SseOpcode::Psrld, - _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), - }, - types::I64X2 => match op { - Opcode::Ushr => SseOpcode::Psrlq, - _ => unimplemented!("{} is not implemented for type {}", op, dst_ty), - }, - _ => unreachable!(), - }; - - // If necessary, move the shift index into the lowest bits of a vector register. - let shift_by = match shift_by { - RegMemImm::Imm { .. } => shift_by, - RegMemImm::Reg { reg } => { - let tmp_shift_by = ctx.alloc_tmp(dst_ty).only_reg().unwrap(); - ctx.emit(Inst::gpr_to_xmm( - SseOpcode::Movd, - RegMem::reg(reg), - OperandSize::Size32, - tmp_shift_by, - )); - RegMemImm::reg(tmp_shift_by.to_reg()) - } - RegMemImm::Mem { .. } => unimplemented!("load shift amount to XMM register"), - }; - - // Move the `src` to the same register as `dst`. - ctx.emit(Inst::gen_move(dst, src, dst_ty)); - - ctx.emit(Inst::xmm_rmi_reg(sse_op, shift_by, dst)); + implemented_in_isle(ctx); } } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 17b64fcb72c8..7c5d1b129b86 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -270,7 +270,7 @@ where debug_assert!(amt < 8); let mask_offset = amt as usize * 16; let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown( - &I8X16_SHL_MASKS[mask_offset..mask_offset + 16], + &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16], )); SyntheticAmode::ConstantOffset(mask_constant) } @@ -278,7 +278,25 @@ where fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode { let mask_table = self .lower_ctx - .use_constant(VCodeConstantData::WellKnown(&I8X16_SHL_MASKS)); + .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS)); + SyntheticAmode::ConstantOffset(mask_table) + } + + fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode { + // When the shift amount is known, we can statically (i.e. at compile + // time) determine the mask to use and only emit that. + debug_assert!(amt < 8); + let mask_offset = amt as usize * 16; + let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown( + &I8X16_USHR_MASKS[mask_offset..mask_offset + 16], + )); + SyntheticAmode::ConstantOffset(mask_constant) + } + + fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode { + let mask_table = self + .lower_ctx + .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS)); SyntheticAmode::ConstantOffset(mask_table) } } @@ -289,8 +307,9 @@ where // right by 0 (no movement), we want to retain all the bits so we mask with // `0xff`; if we shift right by 1, we want to retain all bits except the MSB so // we mask with `0x7f`; etc. + #[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row. -const I8X16_SHL_MASKS: [u8; 128] = [ +const I8X16_ISHL_MASKS: [u8; 128] = [ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, @@ -301,6 +320,18 @@ const I8X16_SHL_MASKS: [u8; 128] = [ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, ]; +#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row. +const I8X16_USHR_MASKS: [u8; 128] = [ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, +]; + #[inline] fn to_simm32(constant: i64) -> Option { if constant == ((constant << 32) >> 32) { diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index 1cfc6d633d54..2ed8c1a2e325 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle f176ef3bba99365 src/prelude.isle 7b911d3b894ae17 -src/isa/x64/inst.isle dbfa857f7f2c5d9f -src/isa/x64/lower.isle 5a737854091e1189 +src/isa/x64/inst.isle 41304d8ef6f7d816 +src/isa/x64/lower.isle 4689585f55f41438 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index 011a6b271faa..354ca5719279 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -84,6 +84,8 @@ pub trait Context { fn nonzero_u64_fits_in_u32(&mut self, arg0: u64) -> Option; fn ishl_i8x16_mask_for_const(&mut self, arg0: u32) -> SyntheticAmode; fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode; + fn ushr_i8x16_mask_for_const(&mut self, arg0: u32) -> SyntheticAmode; + fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode; fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8; } @@ -2135,11 +2137,21 @@ pub fn constructor_psllq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - return Some(expr1_0); } +// Generated as internal constructor for term psrlw. +pub fn constructor_psrlw(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1310. + let expr0_0 = SseOpcode::Psrlw; + let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; + return Some(expr1_0); +} + // Generated as internal constructor for term psrld. pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1310. + // Rule at src/isa/x64/inst.isle line 1315. let expr0_0 = SseOpcode::Psrld; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2149,7 +2161,7 @@ pub fn constructor_psrld(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrlq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1315. + // Rule at src/isa/x64/inst.isle line 1320. let expr0_0 = SseOpcode::Psrlq; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2159,7 +2171,7 @@ pub fn constructor_psrlq(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psraw(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1320. + // Rule at src/isa/x64/inst.isle line 1325. let expr0_0 = SseOpcode::Psraw; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2169,7 +2181,7 @@ pub fn constructor_psraw(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) - pub fn constructor_psrad(ctx: &mut C, arg0: Reg, arg1: &RegMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1325. + // Rule at src/isa/x64/inst.isle line 1330. let expr0_0 = SseOpcode::Psrad; let expr1_0 = constructor_xmm_rmi_reg(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2187,7 +2199,7 @@ pub fn constructor_mul_hi( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1332. + // Rule at src/isa/x64/inst.isle line 1337. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::temp_writable_reg(ctx, pattern0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -2216,7 +2228,7 @@ pub fn constructor_mulhi_u( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1348. + // Rule at src/isa/x64/inst.isle line 1353. let expr0_0: bool = false; let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2232,7 +2244,7 @@ pub fn constructor_cmpps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1353. + // Rule at src/isa/x64/inst.isle line 1358. let expr0_0 = SseOpcode::Cmpps; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -2251,7 +2263,7 @@ pub fn constructor_cmppd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1366. + // Rule at src/isa/x64/inst.isle line 1371. let expr0_0 = SseOpcode::Cmppd; let expr1_0 = C::encode_fcmp_imm(ctx, pattern2_0); let expr2_0 = OperandSize::Size32; @@ -2272,7 +2284,7 @@ pub fn constructor_gpr_to_xmm( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1375. + // Rule at src/isa/x64/inst.isle line 1380. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = MInst::GprToXmm { op: pattern1_0.clone(), @@ -2295,7 +2307,7 @@ pub fn constructor_pinsrb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1382. + // Rule at src/isa/x64/inst.isle line 1387. let expr0_0 = SseOpcode::Pinsrb; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2313,7 +2325,7 @@ pub fn constructor_pinsrw( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1387. + // Rule at src/isa/x64/inst.isle line 1392. let expr0_0 = SseOpcode::Pinsrw; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2333,7 +2345,7 @@ pub fn constructor_pinsrd( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1392. + // Rule at src/isa/x64/inst.isle line 1397. let expr0_0 = SseOpcode::Pinsrd; let expr1_0 = constructor_xmm_rm_r_imm( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2351,7 +2363,7 @@ pub fn constructor_insertps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1397. + // Rule at src/isa/x64/inst.isle line 1402. let expr0_0 = SseOpcode::Insertps; let expr1_0 = OperandSize::Size32; let expr2_0 = @@ -2364,7 +2376,7 @@ pub fn constructor_pextrd(ctx: &mut C, arg0: Type, arg1: Reg, arg2: let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1402. + // Rule at src/isa/x64/inst.isle line 1407. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); let expr2_0 = SseOpcode::Pextrd; @@ -2387,7 +2399,7 @@ pub fn constructor_pextrd(ctx: &mut C, arg0: Type, arg1: Reg, arg2: pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1415. + // Rule at src/isa/x64/inst.isle line 1420. let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Not { @@ -2403,7 +2415,7 @@ pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Reg) -> Option // Generated as internal constructor for term lea. pub fn constructor_lea(ctx: &mut C, arg0: &SyntheticAmode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1422. + // Rule at src/isa/x64/inst.isle line 1427. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::LoadEffectiveAddress { @@ -2455,7 +2467,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1188. + // Rule at src/isa/x64/lower.isle line 1239. let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; return Some(expr0_0); } @@ -2644,7 +2656,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 876. + // Rule at src/isa/x64/lower.isle line 927. let expr0_0 = C::put_in_regs(ctx, pattern7_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -2735,7 +2747,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 809. + // Rule at src/isa/x64/lower.isle line 860. let expr0_0 = C::put_in_regs(ctx, pattern7_0); let expr1_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr2_0 = constructor_shl_i128(ctx, expr0_0, expr1_0)?; @@ -2761,7 +2773,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 668. + // Rule at src/isa/x64/lower.isle line 664. let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr1_0 = C::put_in_regs(ctx, pattern7_0); let expr2_0 = constructor_shr_i128(ctx, expr1_0, expr0_0)?; @@ -2770,7 +2782,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 710. + // Rule at src/isa/x64/lower.isle line 761. let expr0_0 = constructor_lo_reg(ctx, pattern7_1)?; let expr1_0 = C::put_in_regs(ctx, pattern7_0); let expr2_0 = constructor_sar_i128(ctx, expr1_0, expr0_0)?; @@ -2784,7 +2796,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1185. + // Rule at src/isa/x64/lower.isle line 1236. let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; return Some(expr0_0); } @@ -2829,7 +2841,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1289. + // Rule at src/isa/x64/lower.isle line 1340. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsb(ctx, expr0_0, &expr1_0)?; @@ -2839,7 +2851,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1311. + // Rule at src/isa/x64/lower.isle line 1362. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminub(ctx, expr0_0, &expr1_0)?; @@ -2849,7 +2861,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1278. + // Rule at src/isa/x64/lower.isle line 1329. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsb(ctx, expr0_0, &expr1_0)?; @@ -2859,7 +2871,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1300. + // Rule at src/isa/x64/lower.isle line 1351. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxub(ctx, expr0_0, &expr1_0)?; @@ -2884,11 +2896,29 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 674. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psrlw(ctx, expr0_0, &expr2_0)?; + let expr4_0 = constructor_ushr_i8x16_mask(ctx, &expr1_0)?; + let expr5_0: Type = I8X16; + let expr6_0 = ExtKind::None; + let expr7_0 = constructor_x64_load(ctx, expr5_0, &expr4_0, &expr6_0)?; + let expr8_0: Type = I8X16; + let expr9_0 = RegMem::Reg { reg: expr7_0 }; + let expr10_0 = constructor_sse_and(ctx, expr8_0, expr3_0, &expr9_0)?; + let expr11_0 = C::value_reg(ctx, expr10_0); + return Some(expr11_0); + } &Opcode::Sshr => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); let pattern8_0 = C::value_type(ctx, pattern7_1); - // Rule at src/isa/x64/lower.isle line 731. + // Rule at src/isa/x64/lower.isle line 782. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = RegMem::Reg { reg: expr0_0 }; let expr2_0 = constructor_punpcklbw(ctx, expr0_0, &expr1_0)?; @@ -2912,7 +2942,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1131. + // Rule at src/isa/x64/lower.isle line 1182. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsb(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -2933,7 +2963,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1292. + // Rule at src/isa/x64/lower.isle line 1343. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsw(ctx, expr0_0, &expr1_0)?; @@ -2943,7 +2973,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1314. + // Rule at src/isa/x64/lower.isle line 1365. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminuw(ctx, expr0_0, &expr1_0)?; @@ -2953,7 +2983,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1281. + // Rule at src/isa/x64/lower.isle line 1332. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsw(ctx, expr0_0, &expr1_0)?; @@ -2963,7 +2993,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1303. + // Rule at src/isa/x64/lower.isle line 1354. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxuw(ctx, expr0_0, &expr1_0)?; @@ -2973,7 +3003,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 625. + // Rule at src/isa/x64/lower.isle line 621. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; @@ -2981,10 +3011,21 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 715. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psrlw(ctx, expr0_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } &Opcode::Sshr => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 753. + // Rule at src/isa/x64/lower.isle line 804. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; @@ -3000,7 +3041,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1134. + // Rule at src/isa/x64/lower.isle line 1185. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsw(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -3021,7 +3062,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1295. + // Rule at src/isa/x64/lower.isle line 1346. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminsd(ctx, expr0_0, &expr1_0)?; @@ -3031,7 +3072,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1317. + // Rule at src/isa/x64/lower.isle line 1368. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pminud(ctx, expr0_0, &expr1_0)?; @@ -3041,7 +3082,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1284. + // Rule at src/isa/x64/lower.isle line 1335. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxsd(ctx, expr0_0, &expr1_0)?; @@ -3051,7 +3092,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1306. + // Rule at src/isa/x64/lower.isle line 1357. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = constructor_pmaxud(ctx, expr0_0, &expr1_0)?; @@ -3061,7 +3102,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 628. + // Rule at src/isa/x64/lower.isle line 624. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; @@ -3069,10 +3110,21 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 718. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psrld(ctx, expr0_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } &Opcode::Sshr => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 756. + // Rule at src/isa/x64/lower.isle line 807. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; @@ -3088,7 +3140,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1137. + // Rule at src/isa/x64/lower.isle line 1188. let expr0_0 = C::put_in_reg_mem(ctx, pattern5_1); let expr1_0 = constructor_pabsd(ctx, &expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -3109,7 +3161,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 631. + // Rule at src/isa/x64/lower.isle line 627. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; @@ -3117,10 +3169,21 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 721. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg_mem_imm(ctx, pattern7_1); + let expr2_0 = constructor_reg_mem_imm_to_xmm(ctx, &expr1_0)?; + let expr3_0 = constructor_psrlq(ctx, expr0_0, &expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } &Opcode::Sshr => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 768. + // Rule at src/isa/x64/lower.isle line 819. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0: Type = I64; let expr2_0: u8 = 0; @@ -3149,7 +3212,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Iabs = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1151. + // Rule at src/isa/x64/lower.isle line 1202. let expr0_0 = C::put_in_reg(ctx, pattern5_1); let expr1_0: Type = I64X2; let expr2_0: u64 = 0; @@ -3173,7 +3236,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::BandNot = &pattern4_0 { let (pattern6_0, pattern6_1) = C::unpack_value_array_2(ctx, &pattern4_1); - // Rule at src/isa/x64/lower.isle line 1124. + // Rule at src/isa/x64/lower.isle line 1175. let expr0_0 = C::put_in_reg(ctx, pattern6_1); let expr1_0 = C::put_in_reg_mem(ctx, pattern6_0); let expr2_0 = constructor_sse_and_not(ctx, pattern2_0, expr0_0, &expr1_0)?; @@ -3252,7 +3315,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern9_0, pattern9_1) = C::unpack_value_array_2(ctx, &pattern7_1); - // Rule at src/isa/x64/lower.isle line 819. + // Rule at src/isa/x64/lower.isle line 870. let expr0_0 = C::put_in_reg(ctx, pattern9_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern9_1); let expr2_0 = constructor_pavgb(ctx, expr0_0, &expr1_0)?; @@ -3380,7 +3443,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern9_0, pattern9_1) = C::unpack_value_array_2(ctx, &pattern7_1); - // Rule at src/isa/x64/lower.isle line 823. + // Rule at src/isa/x64/lower.isle line 874. let expr0_0 = C::put_in_reg(ctx, pattern9_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern9_1); let expr2_0 = constructor_pavgw(ctx, expr0_0, &expr1_0)?; @@ -3493,7 +3556,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1, pattern7_2) = C::unpack_value_array_3(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1198. + // Rule at src/isa/x64/lower.isle line 1249. let expr0_0 = C::put_in_reg(ctx, pattern7_0); let expr1_0 = C::put_in_reg(ctx, pattern7_1); let expr2_0 = RegMem::Reg { reg: expr0_0 }; @@ -4440,7 +4503,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1, pattern7_2) = C::unpack_value_array_3(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1212. + // Rule at src/isa/x64/lower.isle line 1263. let expr0_0 = C::put_in_reg_mem(ctx, pattern7_0); let expr1_0 = C::put_in_reg_mem(ctx, pattern7_1); let expr2_0 = C::put_in_reg(ctx, pattern7_2); @@ -4458,7 +4521,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1193. + // Rule at src/isa/x64/lower.isle line 1244. let expr0_0 = C::put_in_reg(ctx, pattern5_1); let expr1_0 = constructor_vector_all_ones(ctx, pattern2_0)?; let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -4585,7 +4648,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 639. + // Rule at src/isa/x64/lower.isle line 635. let expr0_0 = ExtendKind::Zero; let expr1_0 = constructor_extend_to_reg(ctx, pattern7_0, pattern3_0, &expr0_0)?; @@ -4838,7 +4901,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 678. + // Rule at src/isa/x64/lower.isle line 729. let expr0_0 = ExtendKind::Sign; let expr1_0 = constructor_extend_to_reg(ctx, pattern7_0, pattern3_0, &expr0_0)?; @@ -4855,7 +4918,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { if let &Opcode::Bnot = &pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1172. + // Rule at src/isa/x64/lower.isle line 1223. let expr0_0 = C::put_in_reg(ctx, pattern5_1); let expr1_0 = constructor_not(ctx, pattern3_0, expr0_0)?; let expr2_0 = C::value_reg(ctx, expr1_0); @@ -4899,7 +4962,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option(ctx: &mut C, arg0: Inst) -> Option( let pattern0_0 = arg0; match pattern0_0 { &RegMemImm::Imm { simm32: pattern1_0 } => { - // Rule at src/isa/x64/lower.isle line 598. + // Rule at src/isa/x64/lower.isle line 600. let expr0_0 = C::ishl_i8x16_mask_for_const(ctx, pattern1_0); return Some(expr0_0); } &RegMemImm::Reg { reg: pattern1_0 } => { - // Rule at src/isa/x64/lower.isle line 605. + // Rule at src/isa/x64/lower.isle line 609. let expr0_0 = C::ishl_i8x16_mask_table(ctx); let expr1_0 = constructor_lea(ctx, &expr0_0)?; let expr2_0: Type = I64; @@ -5144,7 +5207,7 @@ pub fn constructor_ishl_i8x16_mask( &RegMemImm::Mem { addr: ref pattern1_0, } => { - // Rule at src/isa/x64/lower.isle line 613. + // Rule at src/isa/x64/lower.isle line 617. let expr0_0: Type = I64; let expr1_0 = ExtKind::None; let expr2_0 = constructor_x64_load(ctx, expr0_0, &pattern1_0, &expr1_0)?; @@ -5165,7 +5228,7 @@ pub fn constructor_shr_i128( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/lower.isle line 646. + // Rule at src/isa/x64/lower.isle line 642. let expr0_0: usize = 0; let expr1_0 = C::value_regs_get(ctx, pattern0_0, expr0_0); let expr2_0: usize = 1; @@ -5219,6 +5282,48 @@ pub fn constructor_shr_i128( return Some(expr49_0); } +// Generated as internal constructor for term ushr_i8x16_mask. +pub fn constructor_ushr_i8x16_mask( + ctx: &mut C, + arg0: &RegMemImm, +) -> Option { + let pattern0_0 = arg0; + match pattern0_0 { + &RegMemImm::Imm { simm32: pattern1_0 } => { + // Rule at src/isa/x64/lower.isle line 694. + let expr0_0 = C::ushr_i8x16_mask_for_const(ctx, pattern1_0); + return Some(expr0_0); + } + &RegMemImm::Reg { reg: pattern1_0 } => { + // Rule at src/isa/x64/lower.isle line 703. + let expr0_0 = C::ushr_i8x16_mask_table(ctx); + let expr1_0 = constructor_lea(ctx, &expr0_0)?; + let expr2_0: Type = I64; + let expr3_0: u8 = 4; + let expr4_0 = Imm8Reg::Imm8 { imm: expr3_0 }; + let expr5_0 = constructor_shl(ctx, expr2_0, pattern1_0, &expr4_0)?; + let expr6_0: u32 = 0; + let expr7_0: u8 = 0; + let expr8_0 = C::amode_imm_reg_reg_shift(ctx, expr6_0, expr1_0, expr5_0, expr7_0); + let expr9_0 = C::amode_to_synthetic_amode(ctx, &expr8_0); + return Some(expr9_0); + } + &RegMemImm::Mem { + addr: ref pattern1_0, + } => { + // Rule at src/isa/x64/lower.isle line 711. + let expr0_0: Type = I64; + let expr1_0 = ExtKind::None; + let expr2_0 = constructor_x64_load(ctx, expr0_0, &pattern1_0, &expr1_0)?; + let expr3_0 = RegMemImm::Reg { reg: expr2_0 }; + let expr4_0 = constructor_ushr_i8x16_mask(ctx, &expr3_0)?; + return Some(expr4_0); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term sar_i128. pub fn constructor_sar_i128( ctx: &mut C, @@ -5227,7 +5332,7 @@ pub fn constructor_sar_i128( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/lower.isle line 685. + // Rule at src/isa/x64/lower.isle line 736. let expr0_0: usize = 0; let expr1_0 = C::value_regs_get(ctx, pattern0_0, expr0_0); let expr2_0: usize = 1; @@ -5292,14 +5397,14 @@ pub fn constructor_sshr_i8x16_bigger_shift( let pattern1_0 = arg1; match pattern1_0 { &RegMemImm::Imm { simm32: pattern2_0 } => { - // Rule at src/isa/x64/lower.isle line 744. + // Rule at src/isa/x64/lower.isle line 795. let expr0_0: u32 = 8; let expr1_0 = C::u32_add(ctx, pattern2_0, expr0_0); let expr2_0 = RegMemImm::Imm { simm32: expr1_0 }; return Some(expr2_0); } &RegMemImm::Reg { reg: pattern2_0 } => { - // Rule at src/isa/x64/lower.isle line 746. + // Rule at src/isa/x64/lower.isle line 797. let expr0_0: u32 = 8; let expr1_0 = RegMemImm::Imm { simm32: expr0_0 }; let expr2_0 = constructor_add(ctx, pattern0_0, pattern2_0, &expr1_0)?; @@ -5310,7 +5415,7 @@ pub fn constructor_sshr_i8x16_bigger_shift( &RegMemImm::Mem { addr: ref pattern2_0, } => { - // Rule at src/isa/x64/lower.isle line 748. + // Rule at src/isa/x64/lower.isle line 799. let expr0_0: u64 = 8; let expr1_0 = constructor_imm(ctx, pattern0_0, expr0_0)?; let expr2_0 = constructor_add(ctx, pattern0_0, expr1_0, pattern1_0)?; @@ -5334,21 +5439,21 @@ pub fn constructor_sse_and_not( if pattern0_0 == F32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 1113. + // Rule at src/isa/x64/lower.isle line 1164. let expr0_0 = constructor_andnps(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == F64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 1114. + // Rule at src/isa/x64/lower.isle line 1165. let expr0_0 = constructor_andnpd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/lower.isle line 1115. + // Rule at src/isa/x64/lower.isle line 1166. let expr0_0 = constructor_pandn(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -5358,7 +5463,7 @@ pub fn constructor_sse_and_not( // Generated as internal constructor for term i128_not. pub fn constructor_i128_not(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/lower.isle line 1178. + // Rule at src/isa/x64/lower.isle line 1229. let expr0_0 = C::put_in_regs(ctx, pattern0_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -5385,7 +5490,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1232. + // Rule at src/isa/x64/lower.isle line 1283. let expr0_0 = constructor_pinsrb(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -5393,7 +5498,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1235. + // Rule at src/isa/x64/lower.isle line 1286. let expr0_0 = constructor_pinsrw(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -5401,7 +5506,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1238. + // Rule at src/isa/x64/lower.isle line 1289. let expr0_0 = OperandSize::Size32; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -5410,7 +5515,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1241. + // Rule at src/isa/x64/lower.isle line 1292. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; return Some(expr1_0); @@ -5419,7 +5524,7 @@ pub fn constructor_vec_insert_lane( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/lower.isle line 1244. + // Rule at src/isa/x64/lower.isle line 1295. let expr0_0 = C::sse_insertps_lane_imm(ctx, pattern4_0); let expr1_0 = constructor_insertps(ctx, pattern2_0, pattern3_0, expr0_0)?; return Some(expr1_0); @@ -5430,7 +5535,7 @@ pub fn constructor_vec_insert_lane( if let &RegMem::Reg { reg: pattern4_0 } = pattern3_0 { let pattern5_0 = arg3; if pattern5_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1265. + // Rule at src/isa/x64/lower.isle line 1316. let expr0_0 = RegMem::Reg { reg: pattern4_0 }; let expr1_0 = constructor_movsd(ctx, pattern2_0, &expr0_0)?; return Some(expr1_0); @@ -5438,7 +5543,7 @@ pub fn constructor_vec_insert_lane( } let pattern4_0 = arg3; if pattern4_0 == 0 { - // Rule at src/isa/x64/lower.isle line 1266. + // Rule at src/isa/x64/lower.isle line 1317. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern3_0)?; let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -5446,7 +5551,7 @@ pub fn constructor_vec_insert_lane( return Some(expr3_0); } if pattern4_0 == 1 { - // Rule at src/isa/x64/lower.isle line 1274. + // Rule at src/isa/x64/lower.isle line 1325. let expr0_0 = constructor_movlhps(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); }