From 1dbb747d598280d57ee9f7f33793960e2e90535a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 17 Dec 2021 20:39:53 +0100 Subject: [PATCH 1/2] Fix popcnt for small integers --- cranelift/codegen/src/isa/x64/lower.rs | 27 +++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f75a9511bd24..d0d0debfdf83 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2236,14 +2236,25 @@ fn lower_insn_to_regs>( Opcode::Popcnt => { let ty_tmp = ty.unwrap(); if !ty_tmp.is_vector() { - let (ext_spec, ty) = match ctx.input_ty(insn, 0) { - types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32), - a if a == types::I32 || a == types::I64 || a == types::I128 => (None, a), - _ => unreachable!(), - }; + let ty = ctx.input_ty(insn, 0); if isa_flags.use_popcnt() { match ty { + types::I8 | types::I16 => { + let src = RegMem::reg(extend_input_to_reg( + ctx, + inputs[0], + ExtSpec::ZeroExtendTo32, + )); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::unary_rm_r( + OperandSize::from_ty(types::I32), + UnaryRmROpcode::Popcnt, + src, + dst, + )); + return Ok(()); + } types::I32 | types::I64 => { let src = input_to_reg_mem(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); @@ -2299,6 +2310,12 @@ fn lower_insn_to_regs>( } } + let (ext_spec, ty) = match ty { + types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32), + a if a == types::I32 || a == types::I64 || a == types::I128 => (None, a), + _ => unreachable!(), + }; + let (srcs, ty): (SmallVec<[RegMem; 2]>, Type) = if let Some(ext_spec) = ext_spec { ( smallvec![RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))], From 32c3afe4b3eef0ea59c7e052b0ac00efeb123b10 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 17 Dec 2021 20:58:32 +0100 Subject: [PATCH 2/2] Add regression runtests --- .../filetests/runtests/popcnt-aarch64.clif | 50 --------- .../filetests/filetests/runtests/popcnt.clif | 100 ++++++++++++++++++ 2 files changed, 100 insertions(+), 50 deletions(-) delete mode 100644 cranelift/filetests/filetests/runtests/popcnt-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/popcnt.clif diff --git a/cranelift/filetests/filetests/runtests/popcnt-aarch64.clif b/cranelift/filetests/filetests/runtests/popcnt-aarch64.clif deleted file mode 100644 index 99c2b2eca849..000000000000 --- a/cranelift/filetests/filetests/runtests/popcnt-aarch64.clif +++ /dev/null @@ -1,50 +0,0 @@ -test interpret -test run -target aarch64 - -function %popcnt_i8(i8) -> i8 { -block0(v0: i8): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i8(1) == 1 -; run: %popcnt_i8(0x40) == 1 -; run: %popcnt_i8(-1) == 8 -; run: %popcnt_i8(0) == 0 - -function %popcnt_i16(i16) -> i16 { -block0(v0: i16): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i16(1) == 1 -; run: %popcnt_i16(0x4000) == 1 -; run: %popcnt_i16(-1) == 16 -; run: %popcnt_i16(0) == 0 - -function %popcnt_i32(i32) -> i32 { -block0(v0: i32): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i32(1) == 1 -; run: %popcnt_i32(0x40000000) == 1 -; run: %popcnt_i32(-1) == 32 -; run: %popcnt_i32(0) == 0 - -function %popcnt_i64(i64) -> i64 { -block0(v0: i64): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i64(1) == 1 -; run: %popcnt_i64(0x4000000000000000) == 1 -; run: %popcnt_i64(-1) == 64 -; run: %popcnt_i64(0) == 0 - -function %popcnt_i8x16(i8x16) -> i8x16 { -block0(v0: i8x16): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0] diff --git a/cranelift/filetests/filetests/runtests/popcnt.clif b/cranelift/filetests/filetests/runtests/popcnt.clif new file mode 100644 index 000000000000..4f6f7aa9c435 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/popcnt.clif @@ -0,0 +1,100 @@ +test interpret +test run +target aarch64 +target x86_64 +target x86_64 has_popcnt=1 + +function %popcnt_i8(i8) -> i8 { +block0(v0: i8): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i8(1) == 1 +; run: %popcnt_i8(0x40) == 1 +; run: %popcnt_i8(-1) == 8 +; run: %popcnt_i8(0) == 0 + +; Regression test for issue #3615 +function %inv_popcnt_i8(i8) -> i8 { +block0(v0: i8): + v1 = bnot v0 + v2 = popcnt v1 + return v2 +} +; run: %inv_popcnt_i8(1) == 7 +; run: %inv_popcnt_i8(0x40) == 7 +; run: %inv_popcnt_i8(-1) == 0 +; run: %inv_popcnt_i8(0) == 8 + +function %popcnt_i16(i16) -> i16 { +block0(v0: i16): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i16(1) == 1 +; run: %popcnt_i16(0x4000) == 1 +; run: %popcnt_i16(-1) == 16 +; run: %popcnt_i16(0) == 0 + +; Regression test for issue #3615 +function %inv_popcnt_i16(i16) -> i16 { +block0(v0: i16): + v1 = bnot v0 + v2 = popcnt v1 + return v2 +} +; run: %inv_popcnt_i16(1) == 15 +; run: %inv_popcnt_i16(0x4000) == 15 +; run: %inv_popcnt_i16(-1) == 0 +; run: %inv_popcnt_i16(0) == 16 + +function %popcnt_i32(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i32(1) == 1 +; run: %popcnt_i32(0x40000000) == 1 +; run: %popcnt_i32(-1) == 32 +; run: %popcnt_i32(0) == 0 + +; Regression test for issue #3615 +function %inv_popcnt_i32(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = popcnt v1 + return v2 +} +; run: %inv_popcnt_i32(1) == 31 +; run: %inv_popcnt_i32(0x40000000) == 31 +; run: %inv_popcnt_i32(-1) == 0 +; run: %inv_popcnt_i32(0) == 32 + +function %popcnt_i64(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i64(1) == 1 +; run: %popcnt_i64(0x4000000000000000) == 1 +; run: %popcnt_i64(-1) == 64 +; run: %popcnt_i64(0) == 0 + +; Regression test for issue #3615 +function %inv_popcnt_i64(i64) -> i64 { +block0(v0: i64): + v1 = bnot v0 + v2 = popcnt v1 + return v2 +} +; run: %inv_popcnt_i64(1) == 63 +; run: %inv_popcnt_i64(0x4000000000000000) == 63 +; run: %inv_popcnt_i64(-1) == 0 +; run: %inv_popcnt_i64(0) == 64 + +function %popcnt_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0]