Skip to content

Commit

Permalink
Add support for Saturating Rounding Q-format Multiplication for x64
Browse files Browse the repository at this point in the history
  • Loading branch information
jlb6740 committed Jul 21, 2021
1 parent ebbe399 commit 29ae1d6
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
2 changes: 1 addition & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {

match (testsuite, testname) {
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
("simd", "simd_i16x8_extmul_i8x16") => return true,
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
("simd", "simd_int_to_int_extend") => return true,
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ pub enum SseOpcode {
Pmuldq,
Pmulhw,
Pmulhuw,
Pmulhrsw,
Pmulld,
Pmullw,
Pmuludq,
Expand Down Expand Up @@ -785,6 +786,7 @@ impl SseOpcode {
| SseOpcode::Pabsw
| SseOpcode::Pabsd
| SseOpcode::Palignr
| SseOpcode::Pmulhrsw
| SseOpcode::Pshufb => SSSE3,

SseOpcode::Blendvpd
Expand Down Expand Up @@ -966,6 +968,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pmuldq => "pmuldq",
SseOpcode::Pmulhw => "pmulhw",
SseOpcode::Pmulhuw => "pmulhuw",
SseOpcode::Pmulhrsw => "pmulhrsw",
SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq",
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1511,6 +1511,7 @@ pub(crate) fn emit(
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
SseOpcode::Pmuldq => (LegacyPrefixes::_66, 0x0F3828, 3),
SseOpcode::Pmulhw => (LegacyPrefixes::_66, 0x0FE5, 2),
SseOpcode::Pmulhrsw => (LegacyPrefixes::_66, 0x0F380B, 3),
SseOpcode::Pmulhuw => (LegacyPrefixes::_66, 0x0FE4, 2),
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
Expand Down Expand Up @@ -1755,7 +1756,6 @@ pub(crate) fn emit(
let (prefix, opcode) = match op {
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
Expand Down
42 changes: 39 additions & 3 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6405,9 +6405,45 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
},

Opcode::SqmulRoundSat => {
// Lane-wise saturating rounding multiplication in Q15 format
// Optimial lowering taken from instruction proposal https://github.com/WebAssembly/simd/pull/365
// y = i16x8.q15mulr_sat_s(a, b) is lowered to:
//MOVDQA xmm_y, xmm_a
//MOVDQA xmm_tmp, wasm_i16x8_splat(0x8000)
//PMULHRSW xmm_y, xmm_b
//PCMPEQW xmm_tmp, xmm_y
//PXOR xmm_y, xmm_tmp
let input_ty = ctx.input_ty(insn, 0);
let src1 = put_input_in_reg(ctx, inputs[0]);
let src2 = put_input_in_reg(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

ctx.emit(Inst::gen_move(dst, src1, input_ty));
static SAT_MASK: [u8; 16] = [
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
0x00, 0x80,
];
let mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&SAT_MASK));
let mask = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mask_const, mask, types::I16X8));

ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhrsw, RegMem::reg(src2), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pcmpeqw,
RegMem::reg(dst.to_reg()),
mask,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::reg(mask.to_reg()),
dst,
));
}

// Unimplemented opcodes below. These are not currently used by Wasm
// lowering or other known embeddings, but should be either supported or
// removed eventually.
// removed eventually .
Opcode::Uload8x8Complex
| Opcode::Sload8x8Complex
| Opcode::Uload16x4Complex
Expand Down Expand Up @@ -6435,8 +6471,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
unimplemented!("Vector split/concat ops not implemented.");
}

Opcode::SqmulRoundSat | Opcode::Uunarrow => {
unimplemented!("unimplemented lowering for opcode {:?}", op)
Opcode::Uunarrow => {
unimplemented!("unimplemented lowering for opcode {:?}", op);
}

// Opcodes that should be removed by legalization. These should
Expand Down

0 comments on commit 29ae1d6

Please sign in to comment.