From def54fb1fa8196bc69826c36609dbe24ad71d22a Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 16 Jun 2021 14:58:21 +0200 Subject: [PATCH] s390x: Add z14 support * Add support for processor features (including auto-detection). * Move base architecture set requirement back to z14. * Add z15 feature sets and re-enable z15-specific code generation when required features are available. --- Cargo.lock | 1 + cranelift/codegen/meta/src/isa/s390x/mod.rs | 41 ++- cranelift/codegen/src/isa/s390x/inst/emit.rs | 31 +- .../codegen/src/isa/s390x/inst/emit_tests.rs | 9 +- cranelift/codegen/src/isa/s390x/inst/mod.rs | 173 ++++++++++- cranelift/codegen/src/isa/s390x/lower.rs | 272 +++++++++++++----- cranelift/codegen/src/isa/s390x/mod.rs | 2 +- .../filetests/isa/s390x/bitops-arch13.clif | 44 +++ .../filetests/filetests/isa/s390x/bitops.clif | 23 +- .../filetests/isa/s390x/bitwise-arch13.clif | 212 ++++++++++++++ .../filetests/isa/s390x/bitwise.clif | 61 ++-- .../filetests/isa/s390x/fpmem-arch13.clif | 39 +++ .../filetests/filetests/isa/s390x/fpmem.clif | 81 ++++++ cranelift/native/Cargo.toml | 3 + cranelift/native/src/lib.rs | 20 ++ 15 files changed, 895 insertions(+), 117 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif create mode 100644 cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif create mode 100644 cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif create mode 100644 cranelift/filetests/filetests/isa/s390x/fpmem.clif diff --git a/Cargo.lock b/Cargo.lock index 30fa7e662b6e..c412b2086e80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -688,6 +688,7 @@ name = "cranelift-native" version = "0.75.0" dependencies = [ "cranelift-codegen", + "libc", "target-lexicon", ] diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x/mod.rs index 2ec10405535c..b0837dc22db8 100644 --- a/cranelift/codegen/meta/src/isa/s390x/mod.rs +++ b/cranelift/codegen/meta/src/isa/s390x/mod.rs @@ -3,13 +3,50 @@ use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap use crate::cdsl::isa::TargetIsa; use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::IsaRegsBuilder; -use crate::cdsl::settings::SettingGroupBuilder; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let mut settings = SettingGroupBuilder::new("s390x"); + + // The baseline architecture for cranelift is z14 (arch12), + // so we list only facilities of later processors here. + + // z15 (arch13) facilities + let has_mie2 = settings.add_bool( + "has_mie2", + "Has Miscellaneous-Instruction-Extensions Facility 2 support.", + "", + false, + ); + let has_vxrs_ext2 = settings.add_bool( + "has_vxrs_ext2", + "Has Vector-Enhancements Facility 2 support.", + "", + false, + ); + + // Architecture level presets + settings.add_preset( + "arch13", + "Thirteenth Edition of the z/Architecture.", + preset!(has_mie2 && has_vxrs_ext2), + ); + + // Processor presets + settings.add_preset( + "z15", + "IBM z15 processor.", + preset!(has_mie2 && has_vxrs_ext2), + ); + + settings.build() +} + pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); - let settings = SettingGroupBuilder::new("s390x").build(); + let settings = define_settings(&shared_defs.settings); let regs = IsaRegsBuilder::new().build(); let recipes = Recipes::new(); let encodings_predicates = InstructionPredicateMap::new(); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index ecc190bfcf26..b06b834c8053 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -5,6 +5,7 @@ use crate::ir::condcodes::IntCC; use crate::ir::MemFlags; use crate::ir::{SourceLoc, TrapCode}; use crate::isa::s390x::inst::*; +use crate::isa::s390x::settings as s390x_settings; use core::convert::TryFrom; use log::debug; use regalloc::{Reg, RegClass}; @@ -905,17 +906,20 @@ impl EmitState { } /// Constant state used during function compilation. -pub struct EmitInfo(settings::Flags); +pub struct EmitInfo { + flags: settings::Flags, + isa_flags: s390x_settings::Flags, +} impl EmitInfo { - pub(crate) fn new(flags: settings::Flags) -> Self { - Self(flags) + pub(crate) fn new(flags: settings::Flags, isa_flags: s390x_settings::Flags) -> Self { + Self { flags, isa_flags } } } impl MachInstEmitInfo for EmitInfo { fn flags(&self) -> &settings::Flags { - &self.0 + &self.flags } } @@ -924,6 +928,25 @@ impl MachInstEmit for Inst { type Info = EmitInfo; fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + // Verify that we can emit this Inst in the current ISA + let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool { + match iset_requirement { + // Baseline ISA is z14 + InstructionSet::Base => true, + // Miscellaneous-Instruction-Extensions Facility 2 (z15) + InstructionSet::MIE2 => emit_info.isa_flags.has_mie2(), + // Vector-Enhancements Facility 2 (z15) + InstructionSet::VXRS_EXT2 => emit_info.isa_flags.has_vxrs_ext2(), + } + }; + let isa_requirements = self.available_in_isa(); + if !matches_isa_flags(&isa_requirements) { + panic!( + "Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}", + self, isa_requirements + ) + } + // N.B.: we *must* not exceed the "worst-case size" used to compute // where to insert islands, except when islands are explicitly triggered // (with an `EmitIsland`). We check this in debug builds. This is `mut` diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 7c62594d55d6..242c62adbe7e 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -1,5 +1,6 @@ use crate::ir::MemFlags; use crate::isa::s390x::inst::*; +use crate::isa::s390x::settings as s390x_settings; use crate::isa::test_utils; use crate::settings; use alloc::vec::Vec; @@ -7767,8 +7768,14 @@ fn test_s390x_binemit() { )); let flags = settings::Flags::new(settings::builder()); + + use crate::settings::Configurable; + let mut isa_flag_builder = s390x_settings::builder(); + isa_flag_builder.enable("arch13").unwrap(); + let isa_flags = s390x_settings::Flags::new(&flags, isa_flag_builder); + let rru = create_reg_universe(&flags); - let emit_info = EmitInfo::new(flags); + let emit_info = EmitInfo::new(flags, isa_flags); for (insn, expected_encoding, expected_printing) in insns { println!( "S390x: {:?}, {}, {}", diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 1df6f6bb468b..c84294906ee1 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -34,6 +34,18 @@ mod emit_tests; //============================================================================= // Instructions (top level): definition +/// Supported instruction sets +#[allow(non_camel_case_types)] +#[derive(Debug)] +pub(crate) enum InstructionSet { + /// Baseline ISA for cranelift is z14. + Base, + /// Miscellaneous-Instruction-Extensions Facility 2 (z15) + MIE2, + /// Vector-Enhancements Facility 2 (z15) + VXRS_EXT2, +} + /// An ALU operation. This can be paired with several instruction formats /// below (see `Inst`) in any combination. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] @@ -70,6 +82,17 @@ pub enum ALUOp { XorNot64, } +impl ALUOp { + pub(crate) fn available_from(&self) -> InstructionSet { + match self { + ALUOp::AndNot32 | ALUOp::AndNot64 => InstructionSet::MIE2, + ALUOp::OrrNot32 | ALUOp::OrrNot64 => InstructionSet::MIE2, + ALUOp::XorNot32 | ALUOp::XorNot64 => InstructionSet::MIE2, + _ => InstructionSet::Base, + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum UnaryOp { Abs32, @@ -82,6 +105,15 @@ pub enum UnaryOp { PopcntReg, } +impl UnaryOp { + pub(crate) fn available_from(&self) -> InstructionSet { + match self { + UnaryOp::PopcntReg => InstructionSet::MIE2, + _ => InstructionSet::Base, + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum ShiftOp { RotL32, @@ -941,18 +973,6 @@ pub enum Inst { }, } -fn count_zero_half_words(mut value: u64) -> usize { - let mut count = 0; - for _ in 0..4 { - if value & 0xffff == 0 { - count += 1; - } - value >>= 16; - } - - count -} - #[test] fn inst_size_test() { // This test will help with unintentionally growing the size @@ -961,6 +981,135 @@ fn inst_size_test() { } impl Inst { + /// Retrieve the ISA feature set in which the instruction is available. + fn available_in_isa(&self) -> InstructionSet { + match self { + // These instructions are part of the baseline ISA for cranelift (z14) + Inst::Nop0 + | Inst::Nop2 + | Inst::AluRRSImm16 { .. } + | Inst::AluRR { .. } + | Inst::AluRX { .. } + | Inst::AluRSImm16 { .. } + | Inst::AluRSImm32 { .. } + | Inst::AluRUImm32 { .. } + | Inst::AluRUImm16Shifted { .. } + | Inst::AluRUImm32Shifted { .. } + | Inst::ShiftRR { .. } + | Inst::SMulWide { .. } + | Inst::UMulWide { .. } + | Inst::SDivMod32 { .. } + | Inst::SDivMod64 { .. } + | Inst::UDivMod32 { .. } + | Inst::UDivMod64 { .. } + | Inst::Flogr { .. } + | Inst::CmpRR { .. } + | Inst::CmpRX { .. } + | Inst::CmpRSImm16 { .. } + | Inst::CmpRSImm32 { .. } + | Inst::CmpRUImm32 { .. } + | Inst::CmpTrapRR { .. } + | Inst::CmpTrapRSImm16 { .. } + | Inst::CmpTrapRUImm16 { .. } + | Inst::AtomicRmw { .. } + | Inst::AtomicCas32 { .. } + | Inst::AtomicCas64 { .. } + | Inst::Fence + | Inst::Load32 { .. } + | Inst::Load32ZExt8 { .. } + | Inst::Load32SExt8 { .. } + | Inst::Load32ZExt16 { .. } + | Inst::Load32SExt16 { .. } + | Inst::Load64 { .. } + | Inst::Load64ZExt8 { .. } + | Inst::Load64SExt8 { .. } + | Inst::Load64ZExt16 { .. } + | Inst::Load64SExt16 { .. } + | Inst::Load64ZExt32 { .. } + | Inst::Load64SExt32 { .. } + | Inst::LoadRev16 { .. } + | Inst::LoadRev32 { .. } + | Inst::LoadRev64 { .. } + | Inst::Store8 { .. } + | Inst::Store16 { .. } + | Inst::Store32 { .. } + | Inst::Store64 { .. } + | Inst::StoreImm8 { .. } + | Inst::StoreImm16 { .. } + | Inst::StoreImm32SExt16 { .. } + | Inst::StoreImm64SExt16 { .. } + | Inst::StoreRev16 { .. } + | Inst::StoreRev32 { .. } + | Inst::StoreRev64 { .. } + | Inst::LoadMultiple64 { .. } + | Inst::StoreMultiple64 { .. } + | Inst::Mov32 { .. } + | Inst::Mov64 { .. } + | Inst::Mov32Imm { .. } + | Inst::Mov32SImm16 { .. } + | Inst::Mov64SImm16 { .. } + | Inst::Mov64SImm32 { .. } + | Inst::Mov64UImm16Shifted { .. } + | Inst::Mov64UImm32Shifted { .. } + | Inst::Insert64UImm16Shifted { .. } + | Inst::Insert64UImm32Shifted { .. } + | Inst::Extend { .. } + | Inst::CMov32 { .. } + | Inst::CMov64 { .. } + | Inst::CMov32SImm16 { .. } + | Inst::CMov64SImm16 { .. } + | Inst::FpuMove32 { .. } + | Inst::FpuMove64 { .. } + | Inst::FpuCMov32 { .. } + | Inst::FpuCMov64 { .. } + | Inst::MovToFpr { .. } + | Inst::MovFromFpr { .. } + | Inst::FpuRR { .. } + | Inst::FpuRRR { .. } + | Inst::FpuRRRR { .. } + | Inst::FpuCopysign { .. } + | Inst::FpuCmp32 { .. } + | Inst::FpuCmp64 { .. } + | Inst::FpuLoad32 { .. } + | Inst::FpuStore32 { .. } + | Inst::FpuLoad64 { .. } + | Inst::FpuStore64 { .. } + | Inst::LoadFpuConst32 { .. } + | Inst::LoadFpuConst64 { .. } + | Inst::FpuToInt { .. } + | Inst::IntToFpu { .. } + | Inst::FpuRound { .. } + | Inst::FpuVecRRR { .. } + | Inst::Call { .. } + | Inst::CallInd { .. } + | Inst::Ret { .. } + | Inst::EpiloguePlaceholder + | Inst::Jump { .. } + | Inst::CondBr { .. } + | Inst::TrapIf { .. } + | Inst::OneWayCondBr { .. } + | Inst::IndirectBr { .. } + | Inst::Debugtrap + | Inst::Trap { .. } + | Inst::JTSequence { .. } + | Inst::LoadExtNameFar { .. } + | Inst::LoadAddr { .. } + | Inst::VirtualSPOffsetAdj { .. } + | Inst::ValueLabelMarker { .. } + | Inst::Unwind { .. } => InstructionSet::Base, + + // These depend on the opcode + Inst::AluRRR { alu_op, .. } => alu_op.available_from(), + Inst::UnaryRR { op, .. } => op.available_from(), + + // These are all part of VXRS_EXT2 + Inst::FpuLoadRev32 { .. } + | Inst::FpuStoreRev32 { .. } + | Inst::FpuLoadRev64 { .. } + | Inst::FpuStoreRev64 { .. } => InstructionSet::VXRS_EXT2, + } + } + /// Create a 64-bit move instruction. pub fn mov64(to_reg: Writable, from_reg: Reg) -> Inst { assert!(to_reg.to_reg().get_class() == from_reg.get_class()); diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index ce79d2f4b693..c5b621bdcab0 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -5,6 +5,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type}; use crate::isa::s390x::abi::*; use crate::isa::s390x::inst::*; +use crate::isa::s390x::settings as s390x_settings; use crate::isa::s390x::S390xBackend; use crate::machinst::lower::*; use crate::machinst::*; @@ -548,6 +549,70 @@ fn lower_constant_f64>(ctx: &mut C, rd: Writable, val ctx.emit(Inst::load_fp_constant64(rd, value)); } +//============================================================================ +// Lowering: miscellaneous helpers. + +/// Emit code to invert the value of type ty in register rd. +fn lower_bnot>(ctx: &mut C, ty: Type, rd: Writable) { + let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64); + ctx.emit(Inst::AluRUImm32Shifted { + alu_op, + rd, + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }); + if ty_bits(ty) > 32 { + ctx.emit(Inst::AluRUImm32Shifted { + alu_op, + rd, + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }); + } +} + +/// Emit code to bitcast between integer and floating-point values. +fn lower_bitcast>( + ctx: &mut C, + rd: Writable, + output_ty: Type, + rn: Reg, + input_ty: Type, +) { + match (input_ty, output_ty) { + (types::I64, types::F64) => { + ctx.emit(Inst::MovToFpr { rd, rn }); + } + (types::F64, types::I64) => { + ctx.emit(Inst::MovFromFpr { rd, rn }); + } + (types::I32, types::F32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: tmp, + rn, + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::MovToFpr { + rd, + rn: tmp.to_reg(), + }); + } + (types::F32, types::I32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::MovFromFpr { rd: tmp, rn }); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd, + rn: tmp.to_reg(), + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + } + _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), + } +} + //============================================================================= // Lowering: comparisons @@ -760,6 +825,7 @@ fn lower_insn_to_regs>( ctx: &mut C, insn: IRInst, flags: &Flags, + isa_flags: &s390x_settings::Flags, ) -> CodegenResult<()> { let op = ctx.data(insn).opcode(); let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) @@ -1447,15 +1513,19 @@ fn lower_insn_to_regs>( Opcode::Bnot => { let ty = ty.unwrap(); - let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - ctx.emit(Inst::AluRRR { - alu_op, - rd, - rn, - rm: rn, - }); + if isa_flags.has_mie2() { + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), + rd, + rn, + rm: rn, + }); + } else { + ctx.emit(Inst::gen_move(rd, rn, ty)); + lower_bnot(ctx, ty, rd); + } } Opcode::Band => { @@ -1517,16 +1587,22 @@ fn lower_insn_to_regs>( Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => { let ty = ty.unwrap(); - let alu_op = match op { - Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), - Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), - Opcode::BxorNot => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64), + let alu_op = match (op, isa_flags.has_mie2()) { + (Opcode::BandNot, true) => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + (Opcode::BorNot, true) => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), + (Opcode::BxorNot, true) => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64), + (Opcode::BandNot, false) => choose_32_64(ty, ALUOp::And32, ALUOp::And64), + (Opcode::BorNot, false) => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), + (Opcode::BxorNot, false) => choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), _ => unreachable!(), }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + if !isa_flags.has_mie2() { + lower_bnot(ctx, ty, rd); + } } Opcode::Bitselect => { @@ -1542,12 +1618,22 @@ fn lower_insn_to_regs>( rn, rm: rcond, }); - ctx.emit(Inst::AluRRR { - alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), - rd, - rn: rm, - rm: rcond, - }); + if isa_flags.has_mie2() { + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + rd, + rn: rm, + rm: rcond, + }); + } else { + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), + rd, + rn: rm, + rm: rcond, + }); + lower_bnot(ctx, ty, rd); + } ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), rd, @@ -1804,13 +1890,45 @@ fn lower_insn_to_regs>( rd, rn, }); - } else { + } else if isa_flags.has_mie2() { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); ctx.emit(Inst::UnaryRR { op: UnaryOp::PopcntReg, rd, rn, }); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::PopcntByte, + rd, + rn, + }); + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let mut shift = ty_bits(ty) as u8; + while shift > 8 { + shift = shift / 2; + ctx.emit(Inst::ShiftRR { + shift_op: choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64), + rd: tmp, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::AluRR { + alu_op: choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), + rd, + rm: tmp.to_reg(), + }); + } + let shift = ty_bits(ty) as u8 - 8; + ctx.emit(Inst::ShiftRR { + shift_op: choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64), + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(), + shift_reg: None, + }); } } @@ -2027,40 +2145,7 @@ fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let input_ty = ctx.input_ty(insn, 0); let output_ty = ctx.output_ty(insn, 0); - match (input_ty, output_ty) { - (types::I64, types::F64) => { - ctx.emit(Inst::MovToFpr { rd, rn }); - } - (types::F64, types::I64) => { - ctx.emit(Inst::MovFromFpr { rd, rn }); - } - (types::I32, types::F32) => { - let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::ShiftRR { - shift_op: ShiftOp::LShL64, - rd: tmp, - rn, - shift_imm: SImm20::maybe_from_i64(32).unwrap(), - shift_reg: None, - }); - ctx.emit(Inst::MovToFpr { - rd, - rn: tmp.to_reg(), - }); - } - (types::F32, types::I32) => { - let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::MovFromFpr { rd: tmp, rn }); - ctx.emit(Inst::ShiftRR { - shift_op: ShiftOp::LShR64, - rd, - rn: tmp.to_reg(), - shift_imm: SImm20::maybe_from_i64(32).unwrap(), - shift_reg: None, - }); - } - _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), - } + lower_bitcast(ctx, rd, output_ty, rn, input_ty); } Opcode::Load @@ -2130,21 +2215,18 @@ fn lower_insn_to_regs>( (64, 32, true, _) => Inst::Load64SExt32 { rd, mem }, _ => panic!("Unsupported size in load"), }); - } else { - ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { - (32, 32, _, true) => Inst::FpuLoadRev32 { rd, mem }, - (64, 64, _, true) => Inst::FpuLoadRev64 { rd, mem }, - (_, 16, _, false) => Inst::LoadRev16 { rd, mem }, - (_, 32, _, false) => Inst::LoadRev32 { rd, mem }, - (_, 64, _, false) => Inst::LoadRev64 { rd, mem }, - (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, - (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, - (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, - (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, + } else if !is_float { + ctx.emit(match (ext_bits, from_bits, sign_extend) { + (_, 16, _) => Inst::LoadRev16 { rd, mem }, + (_, 32, _) => Inst::LoadRev32 { rd, mem }, + (_, 64, _) => Inst::LoadRev64 { rd, mem }, + (32, 8, false) => Inst::Load32ZExt8 { rd, mem }, + (32, 8, true) => Inst::Load32SExt8 { rd, mem }, + (64, 8, false) => Inst::Load64ZExt8 { rd, mem }, + (64, 8, true) => Inst::Load64SExt8 { rd, mem }, _ => panic!("Unsupported size in load"), }); if to_bits > from_bits && from_bits > 8 { - assert!(is_float == false); ctx.emit(Inst::Extend { rd, rn: rd.to_reg(), @@ -2153,6 +2235,26 @@ fn lower_insn_to_regs>( to_bits: to_bits as u8, }); } + } else if isa_flags.has_vxrs_ext2() { + ctx.emit(match from_bits { + 32 => Inst::FpuLoadRev32 { rd, mem }, + 64 => Inst::FpuLoadRev64 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + } else { + match from_bits { + 32 => { + let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + ctx.emit(Inst::LoadRev32 { rd: tmp, mem }); + lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I32); + } + 64 => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::LoadRev64 { rd: tmp, mem }); + lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I64); + } + _ => panic!("Unsupported size in load"), + } } } @@ -2179,13 +2281,39 @@ fn lower_insn_to_regs>( if ty_is_float(elem_ty) { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - ctx.emit(match (endianness, ty_bits(elem_ty)) { - (Endianness::Big, 32) => Inst::FpuStore32 { rd, mem }, - (Endianness::Big, 64) => Inst::FpuStore64 { rd, mem }, - (Endianness::Little, 32) => Inst::FpuStoreRev32 { rd, mem }, - (Endianness::Little, 64) => Inst::FpuStoreRev64 { rd, mem }, - _ => panic!("Unsupported size in store"), - }); + if endianness == Endianness::Big { + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::FpuStore32 { rd, mem }, + 64 => Inst::FpuStore64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } else if isa_flags.has_vxrs_ext2() { + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::FpuStoreRev32 { rd, mem }, + 64 => Inst::FpuStoreRev64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } else { + match ty_bits(elem_ty) { + 32 => { + let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + lower_bitcast(ctx, tmp, types::I32, rd, elem_ty); + ctx.emit(Inst::StoreRev32 { + rd: tmp.to_reg(), + mem, + }); + } + 64 => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + lower_bitcast(ctx, tmp, types::I64, rd, elem_ty); + ctx.emit(Inst::StoreRev64 { + rd: tmp.to_reg(), + mem, + }); + } + _ => panic!("Unsupported size in load"), + } + } } else if ty_bits(elem_ty) <= 16 { if let Some(imm) = input_matches_const(ctx, inputs[0]) { ctx.emit(match (endianness, ty_bits(elem_ty)) { @@ -2980,7 +3108,7 @@ impl LowerBackend for S390xBackend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { - lower_insn_to_regs(ctx, ir_inst, &self.flags) + lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags) } fn lower_branch_group>( diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index c13ac987f72c..50929c285581 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -57,7 +57,7 @@ impl S390xBackend { func: &Function, flags: shared_settings::Flags, ) -> CodegenResult> { - let emit_info = EmitInfo::new(flags.clone()); + let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); let abi = Box::new(abi::S390xABICallee::new(func, flags)?); compile::compile::(func, self, abi, emit_info) } diff --git a/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif new file mode 100644 index 000000000000..4350ab053d4f --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif @@ -0,0 +1,44 @@ +test compile +target s390x arch13 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; POPCNT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %popcnt_i64(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i32(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i16(i16) -> i16 { +block0(v0: i16): + v1 = popcnt v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i8(i8) -> i8 { +block0(v0: i8): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index e4ab2f1f4be9..3cab149939b3 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -210,7 +210,14 @@ block0(v0: i64): return v1 } -; check: popcnt %r2, %r2, 8 +; check: popcnt %r2, %r2 +; nextln: sllg %r3, %r2, 32 +; nextln: agr %r2, %r3 +; nextln: sllg %r3, %r2, 16 +; nextln: agr %r2, %r3 +; nextln: sllg %r3, %r2, 8 +; nextln: agr %r2, %r3 +; nextln: srlg %r2, %r2, 56 ; nextln: br %r14 function %popcnt_i32(i32) -> i32 { @@ -219,8 +226,12 @@ block0(v0: i32): return v1 } -; check: llgfr %r2, %r2 -; nextln: popcnt %r2, %r2, 8 +; check: popcnt %r2, %r2 +; nextln: sllk %r3, %r2, 16 +; nextln: ar %r2, %r3 +; nextln: sllk %r3, %r2, 8 +; nextln: ar %r2, %r3 +; nextln: srlk %r2, %r2, 24 ; nextln: br %r14 function %popcnt_i16(i16) -> i16 { @@ -229,8 +240,10 @@ block0(v0: i16): return v1 } -; check: llghr %r2, %r2 -; nextln: popcnt %r2, %r2, 8 +; check: popcnt %r2, %r2 +; nextln: sllk %r3, %r2, 8 +; nextln: ar %r2, %r3 +; nextln: srlk %r2, %r2, 8 ; nextln: br %r14 function %popcnt_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif new file mode 100644 index 000000000000..5630fee0dc5f --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif @@ -0,0 +1,212 @@ + +test compile +target s390x arch13 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BAND_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %band_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band_not.i64 v0, v1 + return v2 +} + +; check: nngrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not.i32 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = band_not.i16 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band_not.i8 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor_not.i64 v0, v1 + return v2 +} + +; check: nogrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not.i32 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bor_not.i16 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bor_not.i8 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BXOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bxor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_not.i64 v0, v1 + return v2 +} + +; check: nxgrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor_not.i32 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bxor_not.i16 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bxor_not.i8 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BNOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bnot_i64(i64) -> i64 { +block0(v0: i64): + v1 = bnot.i64 v0 + return v1 +} + +; check: nogrk %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i32(i32) -> i32 { +block0(v0: i32): + v1 = bnot.i32 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i16(i16) -> i16 { +block0(v0: i16): + v1 = bnot.i16 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i8(i8) -> i8 { +block0(v0: i8): + v1 = bnot.i8 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITSELECT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bitselect_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = bitselect.i64 v0, v1, v2 + return v3 +} + +; check: ngr %r3, %r2 +; nextln: nngrk %r2, %r4, %r2 +; nextln: ogr %r2, %r3 +; nextln: br %r14 + +function %bitselect_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = bitselect.i32 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i16(i16, i16, i16) -> i16 { +block0(v0: i16, v1: i16, v2: i16): + v3 = bitselect.i16 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i8(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = bitselect.i8 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index dd1e75cf2f65..205c69230033 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -290,7 +290,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nngrk %r2, %r2, %r3 +; check: ngr %r2, %r3 +; nextln: xilf %r2, 4294967295 +; nextln: xihf %r2, 4294967295 ; nextln: br %r14 function %band_not_i32(i32, i32) -> i32 { @@ -299,7 +301,8 @@ block0(v0: i32, v1: i32): return v2 } -; check: nnrk %r2, %r2, %r3 +; check: nr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %band_not_i16(i16, i16) -> i16 { @@ -308,7 +311,8 @@ block0(v0: i16, v1: i16): return v2 } -; check: nnrk %r2, %r2, %r3 +; check: nr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %band_not_i8(i8, i8) -> i8 { @@ -317,7 +321,8 @@ block0(v0: i8, v1: i8): return v2 } -; check: nnrk %r2, %r2, %r3 +; check: nr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -330,7 +335,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nogrk %r2, %r2, %r3 +; check: ogr %r2, %r3 +; nextln: xilf %r2, 4294967295 +; nextln: xihf %r2, 4294967295 ; nextln: br %r14 function %bor_not_i32(i32, i32) -> i32 { @@ -339,7 +346,8 @@ block0(v0: i32, v1: i32): return v2 } -; check: nork %r2, %r2, %r3 +; check: or %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %bor_not_i16(i16, i16) -> i16 { @@ -348,7 +356,8 @@ block0(v0: i16, v1: i16): return v2 } -; check: nork %r2, %r2, %r3 +; check: or %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %bor_not_i8(i8, i8) -> i8 { @@ -357,7 +366,8 @@ block0(v0: i8, v1: i8): return v2 } -; check: nork %r2, %r2, %r3 +; check: or %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -370,7 +380,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nxgrk %r2, %r2, %r3 +; check: xgr %r2, %r3 +; nextln: xilf %r2, 4294967295 +; nextln: xihf %r2, 4294967295 ; nextln: br %r14 function %bxor_not_i32(i32, i32) -> i32 { @@ -379,7 +391,8 @@ block0(v0: i32, v1: i32): return v2 } -; check: nxrk %r2, %r2, %r3 +; check: xr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %bxor_not_i16(i16, i16) -> i16 { @@ -388,7 +401,8 @@ block0(v0: i16, v1: i16): return v2 } -; check: nxrk %r2, %r2, %r3 +; check: xr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 function %bxor_not_i8(i8, i8) -> i8 { @@ -397,7 +411,8 @@ block0(v0: i8, v1: i8): return v2 } -; check: nxrk %r2, %r2, %r3 +; check: xr %r2, %r3 +; nextln: xilf %r2, 4294967295 ; nextln: br %r14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -410,7 +425,8 @@ block0(v0: i64): return v1 } -; check: nogrk %r2, %r2, %r2 +; check: xilf %r2, 4294967295 +; nextln: xihf %r2, 4294967295 ; nextln: br %r14 function %bnot_i32(i32) -> i32 { @@ -419,7 +435,7 @@ block0(v0: i32): return v1 } -; check: nork %r2, %r2, %r2 +; check: xilf %r2, 4294967295 ; nextln: br %r14 function %bnot_i16(i16) -> i16 { @@ -428,7 +444,7 @@ block0(v0: i16): return v1 } -; check: nork %r2, %r2, %r2 +; check: xilf %r2, 4294967295 ; nextln: br %r14 function %bnot_i8(i8) -> i8 { @@ -437,7 +453,7 @@ block0(v0: i8): return v1 } -; check: nork %r2, %r2, %r2 +; check: xilf %r2, 4294967295 ; nextln: br %r14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -451,7 +467,9 @@ block0(v0: i64, v1: i64, v2: i64): } ; check: ngr %r3, %r2 -; nextln: nngrk %r2, %r4, %r2 +; nextln: ngrk %r2, %r4, %r2 +; nextln: xilf %r2, 4294967295 +; nextln: xihf %r2, 4294967295 ; nextln: ogr %r2, %r3 ; nextln: br %r14 @@ -462,7 +480,8 @@ block0(v0: i32, v1: i32, v2: i32): } ; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 +; nextln: nrk %r2, %r4, %r2 +; nextln: xilf %r2, 4294967295 ; nextln: or %r2, %r3 ; nextln: br %r14 @@ -473,7 +492,8 @@ block0(v0: i16, v1: i16, v2: i16): } ; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 +; nextln: nrk %r2, %r4, %r2 +; nextln: xilf %r2, 4294967295 ; nextln: or %r2, %r3 ; nextln: br %r14 @@ -484,7 +504,8 @@ block0(v0: i8, v1: i8, v2: i8): } ; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 +; nextln: nrk %r2, %r4, %r2 +; nextln: xilf %r2, 4294967295 ; nextln: or %r2, %r3 ; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif new file mode 100644 index 000000000000..8bad0ca09a7c --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif @@ -0,0 +1,39 @@ +test compile +target s390x arch13 + +function %load_f64_little(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 little v0 + return v1 +} + +; check: vlebrg %f0, 0(%r2), 0 +; nextln: br %r14 + +function %load_f32_little(i64) -> f32 { +block0(v0: i64): + v1 = load.f32 little v0 + return v1 +} + +; check: vlebrf %f0, 0(%r2), 0 +; nextln: br %r14 + +function %store_f64_little(f64, i64) { +block0(v0: f64, v1: i64): + store.f64 little v0, v1 + return +} + +; check: vstebrg %f0, 0(%r2), 0 +; nextln: br %r14 + +function %store_f32_little(f32, i64) { +block0(v0: f32, v1: i64): + store.f32 little v0, v1 + return +} + +; check: vstebrf %f0, 0(%r2), 0 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem.clif b/cranelift/filetests/filetests/isa/s390x/fpmem.clif new file mode 100644 index 000000000000..ddf37f84f21b --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/fpmem.clif @@ -0,0 +1,81 @@ +test compile +target s390x + +function %load_f64(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 v0 + return v1 +} + +; check: ld %f0, 0(%r2) +; nextln: br %r14 + +function %load_f32(i64) -> f32 { +block0(v0: i64): + v1 = load.f32 v0 + return v1 +} + +; check: le %f0, 0(%r2) +; nextln: br %r14 + +function %load_f64_little(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 little v0 + return v1 +} + +; check: lrvg %r2, 0(%r2) +; nextln: ldgr %f0, %r2 +; nextln: br %r14 + +function %load_f32_little(i64) -> f32 { +block0(v0: i64): + v1 = load.f32 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: sllg %r2, %r2, 32 +; nextln: ldgr %f0, %r2 +; nextln: br %r14 + +function %store_f64(f64, i64) { +block0(v0: f64, v1: i64): + store.f64 v0, v1 + return +} + +; check: std %f0, 0(%r2) +; nextln: br %r14 + +function %store_f32(f32, i64) { +block0(v0: f32, v1: i64): + store.f32 v0, v1 + return +} + +; check: ste %f0, 0(%r2) +; nextln: br %r14 + +function %store_f64_little(f64, i64) { +block0(v0: f64, v1: i64): + store.f64 little v0, v1 + return +} + +; check: lgdr %r3, %f0 +; nextln: strvg %r3, 0(%r2) +; nextln: br %r14 + +function %store_f32_little(f32, i64) { +block0(v0: f32, v1: i64): + store.f32 little v0, v1 + return +} + +; check: lgdr %r3, %f0 +; nextln: srlg %r3, %r3, 32 +; nextln: strv %r3, 0(%r2) +; nextln: br %r14 + diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml index 6186e87d05b3..d7ffe7d97865 100644 --- a/cranelift/native/Cargo.toml +++ b/cranelift/native/Cargo.toml @@ -14,6 +14,9 @@ edition = "2018" cranelift-codegen = { path = "../codegen", version = "0.75.0", default-features = false } target-lexicon = "0.12" +[target.'cfg(target_arch = "s390x")'.dependencies] +libc = "0.2.95" + [features] default = ["std"] std = ["cranelift-codegen/std"] diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index 80af5f13a2ca..cac50dee4ec2 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -125,6 +125,26 @@ pub fn builder_with_options( } } + // There is no is_s390x_feature_detected macro yet, so for now + // we use getauxval from the libc crate directly. + #[cfg(all(target_arch = "s390x", target_os = "linux"))] + { + use cranelift_codegen::settings::Configurable; + + if !infer_native_flags { + return Ok(isa_builder); + } + + let v = unsafe { libc::getauxval(libc::AT_HWCAP) }; + const HWCAP_S390X_VXRS_EXT2: libc::c_ulong = 32768; + if (v & HWCAP_S390X_VXRS_EXT2) != 0 { + isa_builder.enable("has_vxrs_ext2").unwrap(); + // There is no separate HWCAP bit for mie2, so assume + // that any machine with vxrs_ext2 also has mie2. + isa_builder.enable("has_mie2").unwrap(); + } + } + // squelch warnings about unused mut/variables on some platforms. drop(&mut isa_builder); drop(infer_native_flags);