diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 25c83eede65a..8f242e22dcc3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -613,9 +613,29 @@ pub(crate) fn lower_insn_to_regs>( let to_bits = ty_bits(output_ty) as u8; let to_bits = std::cmp::max(32, to_bits); assert!(from_bits <= to_bits); - if from_bits < to_bits { - let signed = op == Opcode::Sextend; - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + let signed = op == Opcode::Sextend; + let dst = get_output_reg(ctx, outputs[0]); + let src = + if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) { + put_input_in_regs( + ctx, + InsnInput { + insn: extract_insn, + input: 0, + }, + ) + } else { + put_input_in_regs(ctx, inputs[0]) + }; + + let needs_extend = from_bits < to_bits && to_bits <= 64; + // For i128, we want to extend the lower half, except if it is already 64 bits. + let needs_lower_extend = to_bits > 64 && from_bits < 64; + + if needs_extend || needs_lower_extend { + let rn = src.regs()[0]; + let rd = dst.regs()[0]; if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) { let idx = @@ -624,11 +644,7 @@ pub(crate) fn lower_insn_to_regs>( } else { unreachable!(); }; - let input = InsnInput { - insn: extract_insn, - input: 0, - }; - let rn = put_input_in_reg(ctx, input, NarrowValueMode::None); + let size = VectorSize::from_ty(ctx.input_ty(extract_insn, 0)); if signed { @@ -654,10 +670,28 @@ pub(crate) fn lower_insn_to_regs>( rn, signed, from_bits, - to_bits, + to_bits: if to_bits > 64 { 64 } else { to_bits }, }); } } + + if output_ty == I128 { + // We didnt extend the lower reg above, so we just move it + if !needs_lower_extend { + ctx.emit(Inst::gen_move(dst.regs()[0], src.regs()[0], I64)); + } + + if signed { + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Asr64, + rd: dst.regs()[1], + rn: dst.regs()[0].to_reg(), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }); + } else { + lower_constant_u64(ctx, dst.regs()[1], 0); + } + } } Opcode::Bnot => { diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif index 028a62c5c9fe..c64f64af64ce 100644 --- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif @@ -16,3 +16,109 @@ block0(v0: i8): ; nextln: add x0, x1, x0, SXTB ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + + +function %i128_uextend_i64(i64) -> i128 { +block0(v0: i64): + v1 = uextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x1, #0 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_sextend_i64(i64) -> i128 { +block0(v0: i64): + v1 = sextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: asr x1, x0, #63 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_uextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = uextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov w0, w0 +; nextln: movz x1, #0 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_sextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = sextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sxtw x0, w0 +; nextln: asr x1, x0, #63 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_uextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = uextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: uxth w0, w0 +; nextln: movz x1, #0 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_sextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = sextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sxth x0, w0 +; nextln: asr x1, x0, #63 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %i128_uextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = uextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: uxtb w0, w0 +; nextln: movz x1, #0 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %i128_sextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = sextend.i128 v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sxtb x0, w0 +; nextln: asr x1, x0, #63 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/runtests/i128-extend-2.clif b/cranelift/filetests/filetests/runtests/i128-extend-2.clif new file mode 100644 index 000000000000..906d699bbc68 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-extend-2.clif @@ -0,0 +1,69 @@ +test run +target aarch64 +target x86_64 machinst +; TODO: Merge this file with i128-extend once the x86 legacy backend is removed + +function %i128_uextend_i32(i32) -> i64, i64 { +block0(v0: i32): + v1 = uextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_uextend_i32(0) == [0, 0] +; run: %i128_uextend_i32(-1) == [0xffff_ffff, 0] +; run: %i128_uextend_i32(0xffff_eeee) == [0xffff_eeee, 0] + +function %i128_sextend_i32(i32) -> i64, i64 { +block0(v0: i32): + v1 = sextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_sextend_i32(0) == [0, 0] +; run: %i128_sextend_i32(-1) == [-1, -1] +; run: %i128_sextend_i32(0x7fff_ffff) == [0x7fff_ffff, 0x0000_0000_0000_0000] +; run: %i128_sextend_i32(0xffff_eeee) == [0xffff_ffff_ffff_eeee, 0xffff_ffff_ffff_ffff] + + +function %i128_uextend_i16(i16) -> i64, i64 { +block0(v0: i16): + v1 = uextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_uextend_i16(0) == [0, 0] +; run: %i128_uextend_i16(-1) == [0xffff, 0] +; run: %i128_uextend_i16(0xffee) == [0xffee, 0] + +function %i128_sextend_i16(i16) -> i64, i64 { +block0(v0: i16): + v1 = sextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_sextend_i16(0) == [0, 0] +; run: %i128_sextend_i16(-1) == [-1, -1] +; run: %i128_sextend_i16(0x7fff) == [0x7fff, 0x0000_0000_0000_0000] +; run: %i128_sextend_i16(0xffee) == [0xffff_ffff_ffff_ffee, 0xffff_ffff_ffff_ffff] + + +function %i128_uextend_i8(i8) -> i64, i64 { +block0(v0: i8): + v1 = uextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_uextend_i8(0) == [0, 0] +; run: %i128_uextend_i8(-1) == [0xff, 0] +; run: %i128_uextend_i8(0xfe) == [0xfe, 0] + +function %i128_sextend_i8(i8) -> i64, i64 { +block0(v0: i8): + v1 = sextend.i128 v0 + v2, v3 = isplit v1 + return v2, v3 +} +; run: %i128_sextend_i8(0) == [0, 0] +; run: %i128_sextend_i8(-1) == [-1, -1] +; run: %i128_sextend_i8(0x7f) == [0x7f, 0x0000_0000_0000_0000] +; run: %i128_sextend_i8(0xfe) == [0xffff_ffff_ffff_fffe, 0xffff_ffff_ffff_ffff] diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 2e77be6cd4a2..57263be68e7b 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -1,26 +1,26 @@ test run -; target aarch64 TODO: Not yet implemented on aarch64 +target aarch64 ; target s390x TODO: Not yet implemented on s390x target x86_64 machinst target x86_64 legacy -function %i128_uextend(i64) -> i64, i64 { +function %i128_uextend_i64(i64) -> i64, i64 { block0(v0: i64): v1 = uextend.i128 v0 v2, v3 = isplit v1 return v2, v3 } -; run: %i128_uextend(0) == [0, 0] -; run: %i128_uextend(-1) == [-1, 0] -; run: %i128_uextend(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0] +; run: %i128_uextend_i64(0) == [0, 0] +; run: %i128_uextend_i64(-1) == [-1, 0] +; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0] -function %i128_sextend(i64) -> i64, i64 { +function %i128_sextend_i64(i64) -> i64, i64 { block0(v0: i64): v1 = sextend.i128 v0 v2, v3 = isplit v1 return v2, v3 } -; run: %i128_sextend(0) == [0, 0] -; run: %i128_sextend(-1) == [-1, -1] -; run: %i128_sextend(0x7fff_ffff_ffff_ffff) == [0x7fff_ffff_ffff_ffff, 0x0000_0000_0000_0000] -; run: %i128_sextend(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i64(0) == [0, 0] +; run: %i128_sextend_i64(-1) == [-1, -1] +; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == [0x7fff_ffff_ffff_ffff, 0x0000_0000_0000_0000] +; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0xffff_ffff_ffff_ffff]