diff --git a/build.rs b/build.rs index 411593c505e7..42d786799f88 100644 --- a/build.rs +++ b/build.rs @@ -220,10 +220,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { }, "Cranelift" => match (testsuite, testname) { ("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend. - // These are only implemented on x64. - ("simd", "simd_i64x2_arith2") | ("simd", "simd_boolean") => { - return !platform_is_x64() || cfg!(feature = "old-x86-backend") - } // These are new instructions that are not really implemented in any backend. ("simd", "simd_i8x16_arith2") | ("simd", "simd_conversions") diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 1e8ca78317b4..6621e3f4097b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -427,6 +427,15 @@ fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable, rn: R | machreg_to_vec(rd.to_reg()) } +fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); + + 0b010_11110_11_11000_11011_10_00000_00000 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: Reg) -> u32 { debug_assert_eq!(q & 0b1, q); debug_assert_eq!(u & 0b1, u); @@ -1628,6 +1637,7 @@ impl MachInstEmit for Inst { debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16); (0b0, 0b00101, enc_size) } + VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size), }; sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } @@ -2054,6 +2064,13 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::VecRRPair { op, rd, rn } => { + let bits_12_16 = match op { + VecPairOp::Addp => 0b11011, + }; + + sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn)); + } &Inst::VecRRR { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 505fd2c86b3f..9f628fced61b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2311,6 +2311,16 @@ fn test_aarch64_binemit() { "sqxtun v16.8b, v23.8h", )); + insns.push(( + Inst::VecRRPair { + op: VecPairOp::Addp, + rd: writable_vreg(0), + rn: vreg(30), + }, + "C0BBF15E", + "addp d0, v30.2d", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3803,6 +3813,17 @@ fn test_aarch64_binemit() { "cnt v23.8b, v5.8b", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Cmeq0, + rd: writable_vreg(12), + rn: vreg(27), + size: VectorSize::Size16x8, + }, + "6C9B604E", + "cmeq v12.8h, v27.8h, #0", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f6a6aa59d0f2..35903c18d06c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -334,6 +334,8 @@ pub enum VecMisc2 { Frintp, /// Population count per byte Cnt, + /// Compare bitwise equal to 0 + Cmeq0, } /// A Vector narrowing operation with two registers. @@ -347,6 +349,13 @@ pub enum VecMiscNarrowOp { Sqxtun, } +/// A vector operation on a pair of elements with one register. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecPairOp { + /// Add pair of elements + Addp, +} + /// An operation across the lanes of vectors. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecLanesOp { @@ -1011,6 +1020,13 @@ pub enum Inst { high_half: bool, }, + /// 1-operand vector instruction that operates on a pair of elements. + VecRRPair { + op: VecPairOp, + rd: Writable, + rn: Reg, + }, + /// A vector ALU op. VecRRR { alu_op: VecALUOp, @@ -2028,6 +2044,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); } } + &Inst::VecRRPair { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } &Inst::VecRRR { alu_op, rd, rn, rm, .. } => { @@ -2816,6 +2836,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); } } + &mut Inst::VecRRPair { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } &mut Inst::VecRRR { alu_op, ref mut rd, @@ -3856,6 +3884,15 @@ impl Inst { }; format!("{} {}, {}", op, rd, rn) } + &Inst::VecRRPair { op, rd, rn } => { + let op = match op { + VecPairOp::Addp => "addp", + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2); + + format!("{} {}, {}", op, rd, rn) + } &Inst::VecRRR { rd, rn, @@ -3919,43 +3956,44 @@ impl Inst { format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { - let is_shll = op == VecMisc2::Shll; - let suffix = match (is_shll, size) { - (true, VectorSize::Size8x8) => ", #8", - (true, VectorSize::Size16x4) => ", #16", - (true, VectorSize::Size32x2) => ", #32", - _ => "", - }; - - let (op, size) = match op { - VecMisc2::Not => ( - "mvn", - if size.is_128bits() { + let (op, rd_size, size, suffix) = match op { + VecMisc2::Not => { + let size = if size.is_128bits() { VectorSize::Size8x16 } else { VectorSize::Size8x8 + }; + + ("mvn", size, size, "") + } + VecMisc2::Neg => ("neg", size, size, ""), + VecMisc2::Abs => ("abs", size, size, ""), + VecMisc2::Fabs => ("fabs", size, size, ""), + VecMisc2::Fneg => ("fneg", size, size, ""), + VecMisc2::Fsqrt => ("fsqrt", size, size, ""), + VecMisc2::Rev64 => ("rev64", size, size, ""), + VecMisc2::Shll => ( + "shll", + size.widen(), + size, + match size { + VectorSize::Size8x8 => ", #8", + VectorSize::Size16x4 => ", #16", + VectorSize::Size32x2 => ", #32", + _ => panic!("Unexpected vector size: {:?}", size), }, ), - VecMisc2::Neg => ("neg", size), - VecMisc2::Abs => ("abs", size), - VecMisc2::Fabs => ("fabs", size), - VecMisc2::Fneg => ("fneg", size), - VecMisc2::Fsqrt => ("fsqrt", size), - VecMisc2::Rev64 => ("rev64", size), - VecMisc2::Shll => ("shll", size), - VecMisc2::Fcvtzs => ("fcvtzs", size), - VecMisc2::Fcvtzu => ("fcvtzu", size), - VecMisc2::Scvtf => ("scvtf", size), - VecMisc2::Ucvtf => ("ucvtf", size), - VecMisc2::Frintn => ("frintn", size), - VecMisc2::Frintz => ("frintz", size), - VecMisc2::Frintm => ("frintm", size), - VecMisc2::Frintp => ("frintp", size), - VecMisc2::Cnt => ("cnt", size), + VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""), + VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""), + VecMisc2::Scvtf => ("scvtf", size, size, ""), + VecMisc2::Ucvtf => ("ucvtf", size, size, ""), + VecMisc2::Frintn => ("frintn", size, size, ""), + VecMisc2::Frintz => ("frintz", size, size, ""), + VecMisc2::Frintm => ("frintm", size, size, ""), + VecMisc2::Frintp => ("frintp", size, size, ""), + VecMisc2::Cnt => ("cnt", size, size, ""), + VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"), }; - - let rd_size = if is_shll { size.widen() } else { size }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}{}", op, rd, rn, suffix) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index be3edd953b8b..ede66295e9b6 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1950,6 +1950,40 @@ pub(crate) fn lower_insn_to_regs>( } } + Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap(); + + // cmeq vtmp.2d, vm.2d, #0 + // addp dtmp, vtmp.2d + // fcmp dtmp, dtmp + // cset xd, eq + // + // Note that after the ADDP the value of the temporary register will + // be either 0 when all input elements are true, i.e. non-zero, or a + // NaN otherwise (either -1 or -2 when represented as an integer); + // NaNs are the only floating-point numbers that compare unequal to + // themselves. + + ctx.emit(Inst::VecMisc { + op: VecMisc2::Cmeq0, + rd: tmp, + rn: rm, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::VecRRPair { + op: VecPairOp::Addp, + rd: tmp, + rn: tmp.to_reg(), + }); + ctx.emit(Inst::FpuCmp64 { + rn: tmp.to_reg(), + rm: tmp.to_reg(), + }); + materialize_bool_result(ctx, insn, rd, Cond::Eq); + } + Opcode::VanyTrue | Opcode::VallTrue => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -2180,6 +2214,47 @@ pub(crate) fn lower_insn_to_regs>( size: VectorSize::Size32x4, }); } + I64X2 => { + // mov dst_r, src_v.d[0] + // mov tmp_r0, src_v.d[1] + // lsr dst_r, dst_r, #63 + // lsr tmp_r0, tmp_r0, #63 + // add dst_r, dst_r, tmp_r0, lsl #1 + ctx.emit(Inst::MovFromVec { + rd: dst_r, + rn: src_v, + idx: 0, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::MovFromVec { + rd: tmp_r0, + rn: src_v, + idx: 1, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: dst_r, + rn: dst_r.to_reg(), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: tmp_r0, + rn: tmp_r0.to_reg(), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }); + ctx.emit(Inst::AluRRRShift { + alu_op: ALUOp::Add32, + rd: dst_r, + rn: dst_r.to_reg(), + rm: tmp_r0.to_reg(), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(1).unwrap(), + ), + }); + } _ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty), } }