From 6c438d4b64736c34292e94392cca350b2bf5b9bd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 25 Sep 2023 15:35:23 +0100 Subject: [PATCH] riscv64: Add the remainder of Zca and Zcd instructions (#7080) * riscv64: Add `c.li` and `c.lui` * riscv64: Add CB type instructions `c.srli` / `c.srai` / `c.andi` * riscv64: Add `sp` relative load instructions * riscv64: Return Option from try_emit_compressed * riscv64: Implement stack based stores * riscv64: Add compressed stores * riscv64: Add compressed loads --- cranelift/codegen/src/isa/riscv64/inst.isle | 32 ++ .../codegen/src/isa/riscv64/inst/args.rs | 111 +++++- .../codegen/src/isa/riscv64/inst/emit.rs | 235 ++++++++++- .../codegen/src/isa/riscv64/inst/encode.rs | 166 +++++++- .../codegen/src/isa/riscv64/inst/imms.rs | 64 +++ .../filetests/filetests/isa/riscv64/zca.clif | 377 +++++++++++++++++- .../filetests/filetests/isa/riscv64/zcd.clif | 114 ++++++ 7 files changed, 1058 insertions(+), 41 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/zcd.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index f7f92fc20d54..aece273057ca 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -741,6 +741,11 @@ (CAddiw) (CAddi16sp) (CSlli) + (CLi) + (CLui) + (CLwsp) + (CLdsp) + (CFldsp) )) ;; Opcodes for the CIW compressed instruction format @@ -748,6 +753,33 @@ (CAddi4spn) )) +;; Opcodes for the CB compressed instruction format +(type CbOp (enum + (CSrli) + (CSrai) + (CAndi) +)) + +;; Opcodes for the CSS compressed instruction format +(type CssOp (enum + (CSwsp) + (CSdsp) + (CFsdsp) +)) + +;; Opcodes for the CS compressed instruction format +(type CsOp (enum + (CSw) + (CSd) + (CFsd) +)) + +;; Opcodes for the CL compressed instruction format +(type ClOp (enum + (CLw) + (CLd) + (CFld) +)) (type CsrRegOP (enum ;; Atomic Read/Write CSR diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 469dc531e20c..a75b07bfdfb8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -8,7 +8,7 @@ use crate::ir::condcodes::CondCode; use crate::isa::riscv64::inst::{reg_name, reg_to_gpr_num}; use crate::isa::riscv64::lower::isle::generated_code::{ - COpcodeSpace, CaOp, CiOp, CiwOp, CjOp, CrOp, + COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, CjOp, ClOp, CrOp, CsOp, CssOp, }; use crate::machinst::isle::WritableReg; @@ -1317,6 +1317,15 @@ impl LoadOP { } } + pub(crate) fn size(&self) -> i64 { + match self { + Self::Lb | Self::Lbu => 1, + Self::Lh | Self::Lhu => 2, + Self::Lw | Self::Lwu | Self::Flw => 4, + Self::Ld | Self::Fld => 8, + } + } + pub(crate) fn op_code(self) -> u32 { match self { Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => { @@ -1363,6 +1372,16 @@ impl StoreOP { _ => unreachable!(), } } + + pub(crate) fn size(&self) -> i64 { + match self { + Self::Sb => 1, + Self::Sh => 2, + Self::Sw | Self::Fsw => 4, + Self::Sd | Self::Fsd => 8, + } + } + pub(crate) fn op_code(self) -> u32 { match self { Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011, @@ -1983,16 +2002,19 @@ impl CiOp { // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes match self { CiOp::CAddi | CiOp::CSlli => 0b000, - CiOp::CAddiw => 0b001, - CiOp::CAddi16sp => 0b011, + CiOp::CAddiw | CiOp::CFldsp => 0b001, + CiOp::CLi | CiOp::CLwsp => 0b010, + CiOp::CAddi16sp | CiOp::CLui | CiOp::CLdsp => 0b011, } } pub fn op(&self) -> COpcodeSpace { // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap match self { - CiOp::CAddi | CiOp::CAddiw | CiOp::CAddi16sp => COpcodeSpace::C1, - CiOp::CSlli => COpcodeSpace::C2, + CiOp::CAddi | CiOp::CAddiw | CiOp::CAddi16sp | CiOp::CLi | CiOp::CLui => { + COpcodeSpace::C1 + } + CiOp::CSlli | CiOp::CLwsp | CiOp::CLdsp | CiOp::CFldsp => COpcodeSpace::C2, } } } @@ -2012,3 +2034,82 @@ impl CiwOp { } } } + +impl CbOp { + pub fn funct3(&self) -> u32 { + // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes + match self { + CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => 0b100, + } + } + + pub fn funct2(&self) -> u32 { + // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes + match self { + CbOp::CSrli => 0b00, + CbOp::CSrai => 0b01, + CbOp::CAndi => 0b10, + } + } + + pub fn op(&self) -> COpcodeSpace { + // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap + match self { + CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => COpcodeSpace::C1, + } + } +} + +impl CssOp { + pub fn funct3(&self) -> u32 { + // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes + match self { + CssOp::CFsdsp => 0b101, + CssOp::CSwsp => 0b110, + CssOp::CSdsp => 0b111, + } + } + + pub fn op(&self) -> COpcodeSpace { + // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap + match self { + CssOp::CSwsp | CssOp::CSdsp | CssOp::CFsdsp => COpcodeSpace::C2, + } + } +} + +impl CsOp { + pub fn funct3(&self) -> u32 { + // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes + match self { + CsOp::CFsd => 0b101, + CsOp::CSw => 0b110, + CsOp::CSd => 0b111, + } + } + + pub fn op(&self) -> COpcodeSpace { + // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap + match self { + CsOp::CSw | CsOp::CSd | CsOp::CFsd => COpcodeSpace::C0, + } + } +} + +impl ClOp { + pub fn funct3(&self) -> u32 { + // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes + match self { + ClOp::CFld => 0b001, + ClOp::CLw => 0b010, + ClOp::CLd => 0b011, + } + } + + pub fn op(&self) -> COpcodeSpace { + // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap + match self { + ClOp::CLw | ClOp::CLd | ClOp::CFld => COpcodeSpace::C0, + } + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index ffc1173d493f..eea635150c97 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -3,7 +3,9 @@ use crate::binemit::StackMap; use crate::ir::{self, LibCall, RelSourceLoc, TrapCode}; use crate::isa::riscv64::inst::*; -use crate::isa::riscv64::lower::isle::generated_code::{CaOp, CiOp, CiwOp, CrOp}; +use crate::isa::riscv64::lower::isle::generated_code::{ + CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, +}; use crate::machinst::{AllocationConsumer, Reg, Writable}; use crate::trace; use cranelift_control::ControlPlane; @@ -432,8 +434,8 @@ impl MachInstEmit for Inst { let mut start_off = sink.cur_offset(); // First try to emit this as a compressed instruction - let success = inst.try_emit_compressed(sink, emit_info, state, &mut start_off); - if !success { + let res = inst.try_emit_compressed(sink, emit_info, state, &mut start_off); + if res.is_none() { // If we can't lets emit it as a normal instruction inst.emit_uncompressed(sink, emit_info, state, &mut start_off); } @@ -462,13 +464,14 @@ impl Inst { emit_info: &EmitInfo, state: &mut EmitState, start_off: &mut u32, - ) -> bool { + ) -> Option<()> { let has_zca = emit_info.isa_flags.has_zca(); + let has_zcd = emit_info.isa_flags.has_zcd(); // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca // to be enabled, so check it early. if !has_zca { - return false; + return None; } fn reg_is_compressible(r: Reg) -> bool { @@ -613,6 +616,17 @@ impl Inst { sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm)); } + // c.li + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + } if rd.to_reg() != zero_reg() && rs == zero_reg() && imm12.as_i16() != 0 => { + let imm6 = Imm6::maybe_from_imm12(imm12)?; + sink.put2(encode_ci_type(CiOp::CLi, rd, imm6)); + } + // c.addi Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, @@ -620,11 +634,7 @@ impl Inst { rs, imm12, } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => { - let imm6 = match Imm6::maybe_from_imm12(imm12) { - Some(imm6) => imm6, - None => return false, - }; - + let imm6 = Imm6::maybe_from_imm12(imm12)?; sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6)); } @@ -635,13 +645,29 @@ impl Inst { rs, imm12, } if rd.to_reg() == rs && rs != zero_reg() => { - let imm6 = match Imm6::maybe_from_imm12(imm12) { - Some(imm6) => imm6, - None => return false, - }; + let imm6 = Imm6::maybe_from_imm12(imm12)?; sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6)); } + // c.lui + // + // c.lui loads the non-zero 6-bit immediate field into bits 17–12 + // of the destination register, clears the bottom 12 bits, and + // sign-extends bit 17 into all higher bits of the destination. + Inst::Lui { rd, imm: imm20 } + if rd.to_reg() != zero_reg() + && rd.to_reg() != stack_reg() + && imm20.as_i32() != 0 => + { + // Check that the top bits are sign extended + let imm = imm20.as_i32() << 14 >> 14; + if imm != imm20.as_i32() { + return None; + } + let imm6 = Imm6::maybe_from_i32(imm)?; + sink.put2(encode_ci_type(CiOp::CLui, rd, imm6)); + } + // c.slli Inst::AluRRImm12 { alu_op: AluOPRRI::Slli, @@ -654,10 +680,187 @@ impl Inst { let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6)); } - _ => return false, + + // c.srli / c.srai + Inst::AluRRImm12 { + alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai), + rd, + rs, + imm12, + } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => { + let op = match op { + AluOPRRI::Srli => CbOp::CSrli, + AluOPRRI::Srai => CbOp::CSrai, + _ => unreachable!(), + }; + + // The shift amount is unsigned, but we encode it as signed. + let shift = imm12.as_i16() & 0x3f; + let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); + sink.put2(encode_cb_type(op, rd, imm6)); + } + + // c.andi + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd, + rs, + imm12, + } if rd.to_reg() == rs && reg_is_compressible(rs) => { + let imm6 = Imm6::maybe_from_imm12(imm12)?; + sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6)); + } + + // Stack Based Loads + Inst::Load { + rd, + op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld), + from, + flags, + } if from.get_base_register() == Some(stack_reg()) + && (from.get_offset_with_state(state) % op.size()) == 0 => + { + // We encode the offset in multiples of the load size. + let offset = from.get_offset_with_state(state); + let imm6 = u8::try_from(offset / op.size()) + .ok() + .and_then(Uimm6::maybe_from_u8)?; + + // Some additional constraints on these instructions. + // + // Integer loads are not allowed to target x0, but floating point loads + // are, since f0 is not a special register. + // + // Floating point loads are not included in the base Zca extension + // but in a separate Zcd extension. Both of these are part of the C Extension. + let rd_is_zero = rd.to_reg() == zero_reg(); + let op = match op { + LoadOP::Lw if !rd_is_zero => CiOp::CLwsp, + LoadOP::Ld if !rd_is_zero => CiOp::CLdsp, + LoadOP::Fld if has_zcd => CiOp::CFldsp, + _ => return None, + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put2(encode_ci_sp_load(op, rd, imm6)); + } + + // Regular Loads + Inst::Load { + rd, + op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld), + from, + flags, + } if reg_is_compressible(rd.to_reg()) + && from + .get_base_register() + .map(reg_is_compressible) + .unwrap_or(false) + && (from.get_offset_with_state(state) % op.size()) == 0 => + { + let base = from.get_base_register().unwrap(); + + // We encode the offset in multiples of the store size. + let offset = from.get_offset_with_state(state); + let imm5 = u8::try_from(offset / op.size()) + .ok() + .and_then(Uimm5::maybe_from_u8)?; + + // Floating point loads are not included in the base Zca extension + // but in a separate Zcd extension. Both of these are part of the C Extension. + let op = match op { + LoadOP::Lw => ClOp::CLw, + LoadOP::Ld => ClOp::CLd, + LoadOP::Fld if has_zcd => ClOp::CFld, + _ => return None, + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put2(encode_cl_type(op, rd, base, imm5)); + } + + // Stack Based Stores + Inst::Store { + src, + op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd), + to, + flags, + } if to.get_base_register() == Some(stack_reg()) + && (to.get_offset_with_state(state) % op.size()) == 0 => + { + // We encode the offset in multiples of the store size. + let offset = to.get_offset_with_state(state); + let imm6 = u8::try_from(offset / op.size()) + .ok() + .and_then(Uimm6::maybe_from_u8)?; + + // Floating point stores are not included in the base Zca extension + // but in a separate Zcd extension. Both of these are part of the C Extension. + let op = match op { + StoreOP::Sw => CssOp::CSwsp, + StoreOP::Sd => CssOp::CSdsp, + StoreOP::Fsd if has_zcd => CssOp::CFsdsp, + _ => return None, + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put2(encode_css_type(op, src, imm6)); + } + + // Regular Stores + Inst::Store { + src, + op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd), + to, + flags, + } if reg_is_compressible(src) + && to + .get_base_register() + .map(reg_is_compressible) + .unwrap_or(false) + && (to.get_offset_with_state(state) % op.size()) == 0 => + { + let base = to.get_base_register().unwrap(); + + // We encode the offset in multiples of the store size. + let offset = to.get_offset_with_state(state); + let imm5 = u8::try_from(offset / op.size()) + .ok() + .and_then(Uimm5::maybe_from_u8)?; + + // Floating point stores are not included in the base Zca extension + // but in a separate Zcd extension. Both of these are part of the C Extension. + let op = match op { + StoreOP::Sw => CsOp::CSw, + StoreOP::Sd => CsOp::CSd, + StoreOP::Fsd if has_zcd => CsOp::CFsd, + _ => return None, + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + sink.put2(encode_cs_type(op, src, base, imm5)); + } + + _ => return None, } - return true; + return Some(()); } fn emit_uncompressed( diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 2a06b1093a5f..e3054bafb927 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -9,8 +9,9 @@ use super::*; use crate::isa::riscv64::inst::reg_to_gpr_num; use crate::isa::riscv64::lower::isle::generated_code::{ - CaOp, CiOp, CiwOp, CjOp, CrOp, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, - VecAluOpRRRImm5, VecAluOpRRRR, VecElementWidth, VecOpCategory, VecOpMasking, + COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, CjOp, ClOp, CrOp, CsOp, CssOp, VecAluOpRImm5, + VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAluOpRRRR, VecElementWidth, + VecOpCategory, VecOpMasking, }; use crate::machinst::isle::WritableReg; use crate::Reg; @@ -407,6 +408,41 @@ pub fn encode_ci_type(op: CiOp, rd: WritableReg, imm: Imm6) -> u16 { bits.try_into().unwrap() } +// Stack-Pointer relative loads are regular CI instructions, but, the immediate +// is zero extended, and with a slightly different immediate field encoding. +pub fn encode_ci_sp_load(op: CiOp, rd: WritableReg, imm: Uimm6) -> u16 { + let imm = imm.bits(); + + // These are the spec encoded offsets. + // LWSP: [5|4:2|7:6] + // LDSP: [5|4:3|8:6] + // FLDSP: [5|4:3|8:6] + // + // We don't recieve the entire offset in `imm`, just a multiple of the load-size. + + // Number of bits in the lowest position of imm. 3 for lwsp, 2 for {f,}ldsp. + let low_bits = match op { + CiOp::CLwsp => 3, // [4:2] + CiOp::CLdsp | CiOp::CFldsp => 2, // [4:3] + _ => unreachable!(), + }; + let high_bits = 6 - 1 - low_bits; + let mut enc_imm = 0; + + // Encode [7:6] at the bottom of imm + enc_imm |= imm >> (6 - high_bits); + + // Next place [4:2] in the middle + enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits; + + // Finally place [5] at the top + enc_imm |= ((imm >> low_bits) & 1) << 5; + + let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap(); + + encode_ci_type(op, rd, enc_imm) +} + /// c.addi16sp is a regular CI op, but the immediate field is encoded in a weird way pub fn encode_c_addi16sp(imm: Imm6) -> u16 { let imm = imm.bits(); @@ -442,3 +478,129 @@ pub fn encode_ciw_type(op: CiwOp, rd: WritableReg, imm: u8) -> u16 { bits |= unsigned_field_width(op.funct3(), 3) << 13; bits.try_into().unwrap() } + +// Encode a CB type instruction. +// +// The imm field is a 6 bit signed immediate. +// +// 0--1-2-------6-7-------9-10-------11-12-------13--------15 +// |op | imm[4:0] | dst | funct2 | imm[5] | funct3 | +pub fn encode_cb_type(op: CbOp, rd: WritableReg, imm: Imm6) -> u16 { + let imm = imm.bits(); + + let mut bits = 0; + bits |= unsigned_field_width(op.op().bits(), 2); + bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2; + bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7; + bits |= unsigned_field_width(op.funct2(), 2) << 10; + bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12; + bits |= unsigned_field_width(op.funct3(), 3) << 13; + bits.try_into().unwrap() +} + +// Encode a CSS type instruction. +// +// The imm field is a 6 bit unsigned immediate. +// +// 0--1-2-------6-7--------12-13-------15 +// |op | src | imm | funct3 | +pub fn encode_css_type(op: CssOp, src: Reg, imm: Uimm6) -> u16 { + let imm = imm.bits(); + + // These are the spec encoded offsets. + // c.swsp: [5:2|7:6] + // c.sdsp: [5:3|8:6] + // c.fsdsp: [5:3|8:6] + // + // We don't recieve the entire offset in `imm`, just a multiple of the load-size. + + // Number of bits in the lowest position of imm. 4 for c.swsp, 3 for c.{f,}sdsp. + let low_bits = match op { + CssOp::CSwsp => 4, // [5:2] + CssOp::CSdsp | CssOp::CFsdsp => 3, // [5:3] + }; + let high_bits = 6 - low_bits; + + let mut enc_imm = 0; + enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits; + enc_imm |= imm >> low_bits; + + let mut bits = 0; + bits |= unsigned_field_width(op.op().bits(), 2); + bits |= reg_to_gpr_num(src) << 2; + bits |= unsigned_field_width(enc_imm as u32, 6) << 7; + bits |= unsigned_field_width(op.funct3(), 3) << 13; + bits.try_into().unwrap() +} + +// Encode a CS type instruction. +// +// The imm field is a 5 bit unsigned immediate. +// +// 0--1-2-----4-5----------6-7---------9-10----------12-13-----15 +// |op | src | imm(2-bit) | base | imm(3-bit) | funct3 | +pub fn encode_cs_type(op: CsOp, src: Reg, base: Reg, imm: Uimm5) -> u16 { + let size = match op { + CsOp::CFsd | CsOp::CSd => 8, + CsOp::CSw => 4, + }; + + encode_cs_cl_type_bits(op.op(), op.funct3(), size, src, base, imm) +} + +// Encode a CL type instruction. +// +// The imm field is a 5 bit unsigned immediate. +// +// 0--1-2------4-5----------6-7---------9-10----------12-13-----15 +// |op | dest | imm(2-bit) | base | imm(3-bit) | funct3 | +pub fn encode_cl_type(op: ClOp, dest: WritableReg, base: Reg, imm: Uimm5) -> u16 { + let size = match op { + ClOp::CFld | ClOp::CLd => 8, + ClOp::CLw => 4, + }; + + encode_cs_cl_type_bits(op.op(), op.funct3(), size, dest.to_reg(), base, imm) +} + +// CL and CS type instructions have the same physical layout. +// +// 0--1-2----------4-5----------6-7---------9-10----------12-13-----15 +// |op | dest/src | imm(2-bit) | base | imm(3-bit) | funct3 | +fn encode_cs_cl_type_bits( + op: COpcodeSpace, + funct3: u32, + size: u32, + dest_src: Reg, + base: Reg, + imm: Uimm5, +) -> u16 { + let imm = imm.bits(); + + // c.sw / c.lw: [2|6] + // c.sd / c.ld: [7:6] + // c.fsd / c.fld: [7:6] + // + // We differentiate these based on the operation size + let imm2 = match size { + 4 => ((imm >> 4) & 1) | ((imm & 1) << 1), + 8 => (imm >> 3) & 0b11, + _ => unreachable!(), + }; + + // [5:3] on all opcodes + let imm3 = match size { + 4 => (imm >> 1) & 0b111, + 8 => (imm >> 0) & 0b111, + _ => unreachable!(), + }; + + let mut bits = 0; + bits |= unsigned_field_width(op.bits(), 2); + bits |= reg_to_compressed_gpr_num(dest_src) << 2; + bits |= unsigned_field_width(imm2 as u32, 2) << 5; + bits |= reg_to_compressed_gpr_num(base) << 7; + bits |= unsigned_field_width(imm3 as u32, 3) << 10; + bits |= unsigned_field_width(funct3, 3) << 13; + bits.try_into().unwrap() +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs index 6f4d7075db70..a17edcdde291 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs @@ -183,6 +183,14 @@ impl Imm6 { } } + pub fn maybe_from_i32(value: i32) -> Option { + value.try_into().ok().and_then(Imm6::maybe_from_i16) + } + + pub fn maybe_from_i64(value: i64) -> Option { + value.try_into().ok().and_then(Imm6::maybe_from_i16) + } + pub fn maybe_from_imm12(value: Imm12) -> Option { Imm6::maybe_from_i16(value.as_i16()) } @@ -199,6 +207,62 @@ impl Display for Imm6 { } } +/// A unsigned 6-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Uimm6 { + value: u8, +} + +impl Uimm6 { + /// Create an unsigned 6-bit immediate from an u8 + pub fn maybe_from_u8(value: u8) -> Option { + if value <= 63 { + Some(Self { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u8 { + self.value & 0x3f + } +} + +impl Display for Uimm6 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + +/// A unsigned 5-bit immediate. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Uimm5 { + value: u8, +} + +impl Uimm5 { + /// Create an unsigned 5-bit immediate from an u8 + pub fn maybe_from_u8(value: u8) -> Option { + if value <= 31 { + Some(Self { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u8 { + self.value & 0x1f + } +} + +impl Display for Uimm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.value) + } +} + impl Inst { pub(crate) fn imm_min() -> i64 { let imm20_max: i64 = (1 << 19) << 12; diff --git a/cranelift/filetests/filetests/isa/riscv64/zca.clif b/cranelift/filetests/filetests/isa/riscv64/zca.clif index 84ee5c541fb3..cc8a16140c49 100644 --- a/cranelift/filetests/filetests/isa/riscv64/zca.clif +++ b/cranelift/filetests/filetests/isa/riscv64/zca.clif @@ -189,10 +189,10 @@ block0(v0: i8): ; Disassembled: ; block0: ; offset 0x0 ; c.addi16sp sp, -0x10 -; sd ra, 8(sp) -; sd s0, 0(sp) +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) ; c.mv s0, sp -; block1: ; offset 0xc +; block1: ; offset 0x8 ; auipc a2, 0 ; ld a2, 0xa(a2) ; c.j 0xa @@ -242,13 +242,13 @@ block0(v0: i64, v1: i64): ; Disassembled: ; block0: ; offset 0x0 ; c.addi16sp sp, -0x10 -; sd ra, 8(sp) -; sd s0, 0(sp) +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) ; c.mv s0, sp -; block1: ; offset 0xc +; block1: ; offset 0x8 ; c.jalr a1 -; ld ra, 8(sp) -; ld s0, 0(sp) +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) ; c.addi16sp sp, 0x10 ; c.jr ra @@ -372,15 +372,15 @@ block0: ; Disassembled: ; block0: ; offset 0x0 ; c.addi16sp sp, -0x10 -; sd ra, 8(sp) -; sd s0, 0(sp) +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) ; c.mv s0, sp ; c.addi16sp sp, -0x10 -; block1: ; offset 0xe +; block1: ; offset 0xa ; c.mv a0, sp ; c.addi16sp sp, 0x10 -; ld ra, 8(sp) -; ld s0, 0(sp) +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) ; c.addi16sp sp, 0x10 ; c.jr ra @@ -400,6 +400,37 @@ block0(v0: i64): ; c.slli a0, 0x3f ; c.jr ra +function %c_srai(i64) -> i64 { +block0(v0: i64): + v1 = sshr_imm.i64 v0, 63 + return v1 +} + +; VCode: +; block0: +; srai a0,a0,63 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.srai a0, 0x3f +; c.jr ra + +function %c_srli(i64) -> i64 { +block0(v0: i64): + v1 = ushr_imm.i64 v0, 20 + return v1 +} + +; VCode: +; block0: +; srli a0,a0,20 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.srli a0, 0x14 +; c.jr ra function %c_addi4spn() -> i64 { ss0 = explicit_slot 64 @@ -426,15 +457,325 @@ block0: ; Disassembled: ; block0: ; offset 0x0 ; c.addi16sp sp, -0x10 -; sd ra, 8(sp) -; sd s0, 0(sp) +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) ; c.mv s0, sp ; c.addi16sp sp, -0x40 -; block1: ; offset 0xe +; block1: ; offset 0xa ; c.addi4spn a0, sp, 0x18 ; c.addi16sp sp, 0x40 -; ld ra, 8(sp) -; ld s0, 0(sp) +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + +function %c_li() -> i64 { +block0: + v0 = iconst.i64 1 + return v0 +} + +; VCode: +; block0: +; li a0,1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.li a0, 1 +; c.jr ra + + +function %c_lui() -> i64, i64, i64 { +block0: + v0 = iconst.i64 0x4000 + v1 = iconst.i64 0xffffffff_fffff000 + v2 = iconst.i64 0xffffffff_fffe0000 + return v0, v1, v2 +} + +; VCode: +; block0: +; mv a2,a0 +; lui a0,4 +; lui a1,-1 +; lui a4,-32 +; sd a4,0(a2) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.mv a2, a0 +; c.lui a0, 4 +; c.lui a1, 0xfffff +; c.lui a4, 0xfffe0 +; c.sd a4, 0(a2) +; c.jr ra + +function %c_andi_f(i64) -> i64 { +block0(v0: i64): + v1 = band_imm.i64 v0, 0xf + return v1 +} + +; VCode: +; block0: +; andi a0,a0,15 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.andi a0, 0xf +; c.jr ra + +function %c_andi_neg_16(i64) -> i64 { +block0(v0: i64): + v1 = band_imm.i64 v0, -16 + return v1 +} + +; VCode: +; block0: +; andi a0,a0,-16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.andi a0, -0x10 +; c.jr ra + +function %c_andi_zero(i64) -> i64 { +block0(v0: i64): + v1 = band_imm.i64 v0, 0 + return v1 +} + +; VCode: +; block0: +; andi a0,a0,0 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.andi a0, 0 +; c.jr ra + +function %c_lwsp() -> i32 { + ss0 = explicit_slot 16 + +block0: + v0 = stack_load.i32 ss0+12 + return v0 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; lw a0,12(nominal_sp) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x10 +; block1: ; offset 0xa +; c.lwsp a0, 0xc(sp) ; c.addi16sp sp, 0x10 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + +function %c_ldsp() -> i64 { + ss0 = explicit_slot 128 + +block0: + v0 = stack_load.i64 ss0+64 + return v0 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-128 +; block0: +; ld a0,64(nominal_sp) +; add sp,+128 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x80 +; block1: ; offset 0xa +; c.ldsp a0, 0x40(sp) +; c.addi16sp sp, 0x80 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + +function %c_swsp(i32) { + ss0 = explicit_slot 16 + +block0(v0: i32): + stack_store.i32 v0, ss0+12 + return +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; sw a0,12(nominal_sp) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x10 +; block1: ; offset 0xa +; c.swsp a0, 0xc(sp) +; c.addi16sp sp, 0x10 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + +function %c_sdsp(i64) { + ss0 = explicit_slot 128 + +block0(v0: i64): + stack_store.i64 v0, ss0+64 + return +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-128 +; block0: +; sd a0,64(nominal_sp) +; add sp,+128 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x80 +; block1: ; offset 0xa +; c.sdsp a0, 0x40(sp) +; c.addi16sp sp, 0x80 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + + +function %c_sw(i64, i32) { +block0(v0: i64, v1: i32): + store.i32 v1, v0+12 + store.i32 v1, v0-12 + return +} + +; VCode: +; block0: +; sw a1,12(a0) +; sw a1,-12(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.sw a1, 0xc(a0) +; sw a1, -0xc(a0) +; c.jr ra + +function %c_sd(i64, i64) { +block0(v0: i64, v1: i64): + store.i32 v1, v0+16 + store.i32 v1, v0-16 + return +} + +; VCode: +; block0: +; sd a1,16(a0) +; sd a1,-16(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.sd a1, 0x10(a0) +; sd a1, -0x10(a0) +; c.jr ra + +function %c_lw(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0+64 + return v1 +} + +; VCode: +; block0: +; lw a0,64(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.lw a0, 0x40(a0) +; c.jr ra + +function %c_ld(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0+64 + return v1 +} + +; VCode: +; block0: +; ld a0,64(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.ld a0, 0x40(a0) ; c.jr ra diff --git a/cranelift/filetests/filetests/isa/riscv64/zcd.clif b/cranelift/filetests/filetests/isa/riscv64/zcd.clif new file mode 100644 index 000000000000..f67537775f74 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/zcd.clif @@ -0,0 +1,114 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zca has_zcd + +function %c_fldsp() -> f64 { + ss0 = explicit_slot 16 + +block0: + v0 = stack_load.f64 ss0+8 + return v0 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; fld fa0,8(nominal_sp) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x10 +; block1: ; offset 0xa +; c.fldsp fa0, 8(sp) +; c.addi16sp sp, 0x10 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + +function %c_fsdsp(f64) { + ss0 = explicit_slot 128 + +block0(v0: f64): + stack_store.f64 v0, ss0+64 + return +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-128 +; block0: +; fsd fa0,64(nominal_sp) +; add sp,+128 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.addi16sp sp, -0x10 +; c.sdsp ra, 8(sp) +; c.sdsp s0, 0(sp) +; c.mv s0, sp +; c.addi16sp sp, -0x80 +; block1: ; offset 0xa +; c.fsdsp fa0, 0x40(sp) +; c.addi16sp sp, 0x80 +; c.ldsp ra, 8(sp) +; c.ldsp s0, 0(sp) +; c.addi16sp sp, 0x10 +; c.jr ra + + +function %c_fsd(i64, f64) { +block0(v0: i64, v1: f64): + store.i32 v1, v0+16 + store.i32 v1, v0-16 + return +} + +; VCode: +; block0: +; fsd fa0,16(a0) +; fsd fa0,-16(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.fsd fa0, 0x10(a0) +; fsd fa0, -0x10(a0) +; c.jr ra + +function %c_fld(i64) -> f64 { +block0(v0: i64): + v1 = load.f64 v0+64 + return v1 +} + +; VCode: +; block0: +; fld fa0,64(a0) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; c.fld fa0, 0x40(a0) +; c.jr ra +