diff --git a/CHANGELOG.md b/CHANGELOG.md index a3a539c26..0981f6b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| +| 13.09.2024 | 1.10.3.5 | rtl code cleanups; minor CPU control optimizations | [#1014](https://github.com/stnolting/neorv32/pull/1014) | | 08.09.2024 | 1.10.3.4 | minor rtl/CSR optimizations | [#1010](https://github.com/stnolting/neorv32/pull/1010) | | 08.09.2024 | 1.10.3.3 | optimize CSR address logic (to reduce switching activity) | [#1008](https://github.com/stnolting/neorv32/pull/1008) | | 05.09.2024 | 1.10.3.2 | :test_tube: Remove "for loop" construct from memory initialization function as the max. number of loop/unrolling iterations might be constrained | [#1005](https://github.com/stnolting/neorv32/pull/1005) | diff --git a/rtl/core/neorv32_cache.vhd b/rtl/core/neorv32_cache.vhd index 6b4ea0744..14f144c88 100644 --- a/rtl/core/neorv32_cache.vhd +++ b/rtl/core/neorv32_cache.vhd @@ -176,7 +176,7 @@ begin -- Check if Direct/Uncached Access -------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - dir_acc_d <= '1' when (UC_ENABLE = true) and -- direct accesses implemented + dir_acc_d <= '1' when UC_ENABLE and -- direct accesses implemented ((host_req_i.addr(31 downto 28) >= UC_BEGIN) or -- uncached memory page (host_req_i.rvso = '1')) else '0'; -- atomic (reservation set) operation @@ -210,7 +210,7 @@ begin dir_acc_q <= '0'; end if; -- bus request buffer -- - if (READ_ONLY = true) then -- do not propagate STB on write access, issue ERR instead + if READ_ONLY then -- do not propagate STB on write access, issue ERR instead dir_req_q <= dir_req_d; dir_req_q.stb <= dir_req_d.stb and (not dir_req_d.rw); -- read accesses only dir_rsp_q <= dir_rsp_d; @@ -485,7 +485,7 @@ begin bus_sync_o <= '1'; -- trigger bus unit: sync operation ctrl.state_nxt <= S_WAIT_SYNC; elsif (req_i.stb = '1') or (ctrl.req_buf = '1') then -- (pending) access request - if (req_i.rw = '1') and (READ_ONLY = true) then -- invalid write access? + if (req_i.rw = '1') and READ_ONLY then -- invalid write access? ctrl.state_nxt <= S_ERROR; else ctrl.state_nxt <= S_CHECK; @@ -497,7 +497,7 @@ begin rsp_o.data <= rdata_i; -- output read data ctrl.req_buf_nxt <= '0'; -- access request completed if (hit_i = '1') then - if (req_i.rw = '1') and (READ_ONLY = false) then -- write access + if (req_i.rw = '1') and (not READ_ONLY) then -- write access dirty_o <= '1'; -- cache block is dirty now we_o <= req_i.ben; -- finalize write access end if; @@ -683,7 +683,7 @@ begin -- Access Status (1 Cycle Latency) -------------------------------------------------------- -- ------------------------------------------------------------------------------------------- hit_o <= '1' when (valid_mem_rd = '1') and (tag_mem_rd = acc_tag_ff) else '0'; -- cache access hit - dirty_o <= '1' when (valid_mem_rd = '1') and (dirty_mem_rd = '1') and (READ_ONLY = false) else '0'; -- accessed block is dirty + dirty_o <= '1' when (valid_mem_rd = '1') and (dirty_mem_rd = '1') and (not READ_ONLY) else '0'; -- accessed block is dirty -- base address of accessed block -- base_o(31 downto 31-(tag_size_c-1)) <= tag_mem_rd; @@ -698,10 +698,10 @@ begin if rising_edge(clk_i) then -- write access -- if (we_i(0) = '1') then - data_mem_b0(to_integer(unsigned(acc_adr))) <= wdata_i(07 downto 00); + data_mem_b0(to_integer(unsigned(acc_adr))) <= wdata_i(7 downto 0); end if; if (we_i(1) = '1') then - data_mem_b1(to_integer(unsigned(acc_adr))) <= wdata_i(15 downto 08); + data_mem_b1(to_integer(unsigned(acc_adr))) <= wdata_i(15 downto 8); end if; if (we_i(2) = '1') then data_mem_b2(to_integer(unsigned(acc_adr))) <= wdata_i(23 downto 16); @@ -882,7 +882,7 @@ begin -- ------------------------------------------------------------ upret_nxt <= S_DOWNLOAD_REQ; -- go straight to S_DOWNLOAD_REQ when S_UPLOAD_GET has completed (if executed) addr_nxt.idx <= baddr.idx; -- index of reference cache block - if (dirty_i = '1') and (READ_ONLY = false) then -- block is dirty, upload first + if (dirty_i = '1') and (not READ_ONLY) then -- block is dirty, upload first addr_nxt.tag <= baddr.tag; -- base address (tag + index) of accessed block state_nxt <= S_UPLOAD_GET; else -- block is clean, download new block @@ -915,7 +915,7 @@ begin when S_UPLOAD_GET => -- upload dirty cache block: read word from cache -- ------------------------------------------------------------ - if (READ_ONLY = true) then + if READ_ONLY then state_nxt <= S_IDLE; else bus_req_o.rw <= '1'; -- write access @@ -924,7 +924,7 @@ begin when S_UPLOAD_REQ => -- upload dirty cache block: request bus write -- ------------------------------------------------------------ - if (READ_ONLY = true) then + if READ_ONLY then state_nxt <= S_IDLE; else bus_req_o.rw <= '1'; -- write access @@ -934,7 +934,7 @@ begin when S_UPLOAD_RSP => -- upload dirty cache block: wait for bus response -- ------------------------------------------------------------ - if (READ_ONLY = true) then + if READ_ONLY then state_nxt <= S_IDLE; else bus_req_o.rw <= '1'; -- write access @@ -964,7 +964,7 @@ begin -- ------------------------------------------------------------ addr_nxt.tag <= baddr.tag; -- tag of currently index block inval_o <= '1'; -- invalidate currently index block - if (dirty_i = '1') and (READ_ONLY = false) then -- block dirty? + if (dirty_i = '1') and (not READ_ONLY) then -- block dirty? state_nxt <= S_UPLOAD_GET; else -- move on to next block addr_nxt.idx <= std_ulogic_vector(unsigned(addr.idx) + 1); diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index ee7d05c97..bacccf851 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -32,16 +32,16 @@ entity neorv32_cpu_control is HART_ID : std_ulogic_vector(31 downto 0); -- hardware thread ID VENDOR_ID : std_ulogic_vector(31 downto 0); -- vendor's JEDEC ID CPU_BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address - CPU_DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode parking loop entry address, 4-byte aligned - CPU_DEBUG_EXC_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode exception entry address, 4-byte aligned + CPU_DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug-mode parking loop entry address, 4-byte aligned + CPU_DEBUG_EXC_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug-mode exception entry address, 4-byte aligned -- RISC-V CPU Extensions -- CPU_EXTENSION_RISCV_A : boolean; -- implement atomic memory operations extension? CPU_EXTENSION_RISCV_B : boolean; -- implement bit-manipulation extension? CPU_EXTENSION_RISCV_C : boolean; -- implement compressed extension? - CPU_EXTENSION_RISCV_E : boolean; -- implement embedded RF extension? + CPU_EXTENSION_RISCV_E : boolean; -- implement embedded-class register file extension? CPU_EXTENSION_RISCV_M : boolean; -- implement mul/div extension? CPU_EXTENSION_RISCV_U : boolean; -- implement user mode extension? - CPU_EXTENSION_RISCV_Zfinx : boolean; -- implement 32-bit floating-point extension (using INT regs) + CPU_EXTENSION_RISCV_Zfinx : boolean; -- implement 32-bit floating-point extension (using INT regs)? CPU_EXTENSION_RISCV_Zicntr : boolean; -- implement base counters? CPU_EXTENSION_RISCV_Zicond : boolean; -- implement integer conditional operations? CPU_EXTENSION_RISCV_Zihpm : boolean; -- implement hardware performance monitors? @@ -146,12 +146,10 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is is_m_mul, is_m_div : std_ulogic; is_b_imm, is_b_reg : std_ulogic; is_zicond : std_ulogic; - rs1_zero, rd_zero : std_ulogic; end record; signal decode_aux : decode_aux_t; -- instruction execution engine -- - -- make sure reset state is the first item in the list (discussion #415) type execute_engine_state_t is (DISPATCH, TRAP_ENTER, TRAP_EXIT, RESTART, SLEEP, EXECUTE, ALU_WAIT, BRANCH, BRANCHED, SYSTEM, MEM_REQ, MEM_WAIT); type execute_engine_t is record @@ -511,12 +509,14 @@ begin imm_o <= (others => '0'); elsif rising_edge(clk_i) then if (execute_engine.state = DISPATCH) then -- prepare update of next_pc (using ALU's PC + IMM in EXECUTE state) + imm_o <= (others => '0'); if CPU_EXTENSION_RISCV_C and (issue_engine.data(33) = '1') then -- is de-compressed C instruction? - imm_o <= x"00000002"; + imm_o(3 downto 0) <= x"2"; else - imm_o <= x"00000004"; + imm_o(3 downto 0) <= x"4"; end if; else + imm_o <= replicate_f(execute_engine.ir(31), 21) & execute_engine.ir(30 downto 21) & execute_engine.ir(20); -- default: I-immediate case decode_aux.opcode is when opcode_store_c => -- S-immediate imm_o <= replicate_f(execute_engine.ir(31), 21) & execute_engine.ir(30 downto 25) & execute_engine.ir(11 downto 7); @@ -527,9 +527,9 @@ begin when opcode_jal_c => -- J-immediate imm_o <= replicate_f(execute_engine.ir(31), 12) & execute_engine.ir(19 downto 12) & execute_engine.ir(20) & execute_engine.ir(30 downto 21) & '0'; when opcode_amo_c => -- atomic memory access - imm_o <= (others => '0'); - when others => -- I-immediate - imm_o <= replicate_f(execute_engine.ir(31), 21) & execute_engine.ir(30 downto 21) & execute_engine.ir(20); + if CPU_EXTENSION_RISCV_A then imm_o <= (others => '0'); end if; + when others => + NULL; -- use default end case; end if; end if; @@ -608,12 +608,12 @@ begin execute_engine.next_pc <= csr.mepc(XLEN-1 downto 1) & '0'; end if; - when BRANCH => -- control flow transfer + when BRANCH => -- branch instruction if (trap_ctrl.exc_buf(exc_illegal_c) = '0') and (execute_engine.branch_taken = '1') then -- valid taken branch execute_engine.next_pc <= alu_add_i(XLEN-1 downto 1) & '0'; end if; - when EXECUTE => -- linear increment (use ALU's adder to compute next_pc = current_pc + imm) + when EXECUTE => -- linear increment (use ALU's adder to compute next_pc = current_pc + imm (2/4)) execute_engine.next_pc <= alu_add_i(XLEN-1 downto 1) & '0'; when others => -- no update @@ -630,13 +630,21 @@ begin -- PC output -- curr_pc_o <= execute_engine.pc(XLEN-1 downto 1) & '0'; -- current PC - link_pc_o <= (execute_engine.link_pc(XLEN-1 downto 1) & '0'); -- jump-and-link return address + link_pc_o <= execute_engine.link_pc(XLEN-1 downto 1) & '0'; -- jump-and-link return address -- Decoding Helper Logic ------------------------------------------------------------------ -- ------------------------------------------------------------------------------------------- decode_helper: process(execute_engine) + variable f7_v : std_ulogic_vector(6 downto 0); + variable f5_v : std_ulogic_vector(4 downto 0); + variable f3_v : std_ulogic_vector(2 downto 0); begin + -- shortcuts -- + f7_v := execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c); -- funct7 + f5_v := execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c); -- funct5 + f3_v := execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c); -- funct3 + -- defaults -- decode_aux.is_b_imm <= '0'; decode_aux.is_b_reg <= '0'; @@ -647,72 +655,54 @@ begin -- BITMANIP instruction -- if CPU_EXTENSION_RISCV_B then -- implemented at all? -- register-immediate operation -- - if ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001") and ( - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00000") or -- CLZ - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00001") or -- CTZ - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00010") or -- CPOP - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00100") or -- SEXT.B - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00101") -- SEXT.H - )) or - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "101")) or -- RORI - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "101") and - (execute_engine.ir(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00111")) or -- ORCB - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100100") and (execute_engine.ir(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- BCLRI / BEXTI - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110100") and (execute_engine.ir(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- REV8 / BINVI - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) then -- BSETI + if ((f7_v = "0110000") and (f3_v = "001") and ((f5_v = "00000") or (f5_v = "00000") or (f5_v = "00010") or (f5_v = "00100") or (f5_v = "00101"))) or -- CLZ, CTZ, CPOP, SEXT.[B/H] + ((f7_v = "0110000") and (f3_v = "101")) or -- RORI + ((f7_v = "0010100") and (f3_v = "101") and (f5_v = "00111")) or -- ORCB + ((f7_v = "0100100") and (f3_v(1 downto 0) = "01")) or -- BCLRI / BEXTI + ((f7_v = "0110100") and (f3_v(1 downto 0) = "01")) or -- REV8 / BINVI + ((f7_v = "0010100") and (f3_v = "001")) then -- BSETI decode_aux.is_b_imm <= '1'; end if; -- register-register operation -- - if ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.ir(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- ROR / ROL - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000101") and (execute_engine.ir(instr_funct3_msb_c) = '1')) or -- MIN[U] / MAX[U] - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000100") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100")) or -- ZEXTH - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100100") and (execute_engine.ir(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- BCLR / BEXT - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110100") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) or -- BINV - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) or -- BSET - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100000") and ( - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "111") or -- ANDN - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "110") or -- ORN - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100") -- XORN - )) or - ((execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010000") and ( - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "010") or -- SH1ADD - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100") or -- SH2ADD - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "110") -- SH3ADD - ) - ) then + if ((f7_v = "0110000") and (f3_v(1 downto 0) = "01")) or -- ROR / ROL + ((f7_v = "0000101") and (f3_v(2) = '1')) or -- MIN[U] / MAX[U] + ((f7_v = "0000100") and (f3_v = "100")) or -- ZEXTH + ((f7_v = "0100100") and (f3_v(1 downto 0) = "01")) or -- BCLR / BEXT + (((f7_v = "0110100") or (f7_v = "0010100")) and (f3_v = "001")) or -- BINV / BSET + ((f7_v = "0100000") and ((f3_v = "111") or (f3_v = "110") or (f3_v = "100"))) or -- ANDN, ORN, XORN + ((f7_v = "0010000") and ((f3_v = "010") or (f3_v = "100") or (f3_v = "110"))) then -- SH1ADD, SH2ADD, SH3ADD decode_aux.is_b_reg <= '1'; end if; end if; -- integer MUL (M/Zmmul) / DIV (M) instruction -- - if (execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000001") then - if (CPU_EXTENSION_RISCV_M or CPU_EXTENSION_RISCV_Zmmul) and (execute_engine.ir(instr_funct3_msb_c) = '0') then + if (f7_v = "0000001") then + if (CPU_EXTENSION_RISCV_M or CPU_EXTENSION_RISCV_Zmmul) and (f3_v(2) = '0') then decode_aux.is_m_mul <= '1'; end if; - if CPU_EXTENSION_RISCV_M and (execute_engine.ir(instr_funct3_msb_c) = '1') then + if CPU_EXTENSION_RISCV_M and (f3_v(2) = '1') then decode_aux.is_m_div <= '1'; end if; end if; -- CONDITIONAL instruction (Zicond) -- - if CPU_EXTENSION_RISCV_Zicond and (execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000111") and - (execute_engine.ir(instr_funct3_msb_c) = '1') and (execute_engine.ir(instr_funct3_lsb_c) = '1') then + if CPU_EXTENSION_RISCV_Zicond and (f7_v = "0000111") and (f3_v(2) = '1') and (f3_v(0) = '1') then decode_aux.is_zicond <= '1'; end if; - end process decode_helper; - -- register/uimm5 checks -- - decode_aux.rs1_zero <= '1' when (execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c) = "00000") else '0'; - decode_aux.rd_zero <= '1' when (execute_engine.ir(instr_rd_msb_c downto instr_rd_lsb_c ) = "00000") else '0'; - - -- simplified rv32 opcode -- - decode_aux.opcode <= execute_engine.ir(instr_opcode_msb_c downto instr_opcode_lsb_c+2) & "11"; + -- simplified rv32 opcode -- + decode_aux.opcode <= execute_engine.ir(instr_opcode_msb_c downto instr_opcode_lsb_c+2) & "11"; + end process decode_helper; -- Execute Engine FSM Comb ---------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- execute_engine_fsm_comb: process(execute_engine, debug_ctrl, trap_ctrl, hw_trigger_match, decode_aux, issue_engine, csr, alu_cp_done_i, lsu_wait_i) + variable funct3_v : std_ulogic_vector(2 downto 0); begin + -- shortcuts -- + funct3_v := execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c); + -- arbiter defaults -- execute_engine.state_nxt <= execute_engine.state; execute_engine.ir_nxt <= execute_engine.ir; @@ -728,13 +718,13 @@ begin trap_ctrl.hwtrig <= '0'; csr.we_nxt <= '0'; csr.re_nxt <= '0'; - ctrl_nxt <= ctrl_bus_zero_c; -- all zero/off by default (default ALU operation = ZERO, adder.out = ADD) + ctrl_nxt <= ctrl_bus_zero_c; -- all zero/off by default (default ALU operation = ZERO, ALU.adder_out = ADD) -- ALU sign control -- - if (execute_engine.ir(instr_opcode_lsb_c+4) = '1') then -- ALU ops - ctrl_nxt.alu_unsigned <= execute_engine.ir(instr_funct3_lsb_c+0); -- unsigned ALU operation? (SLTIU, SLTU) + if (decode_aux.opcode(4) = '1') then -- ALU ops + ctrl_nxt.alu_unsigned <= funct3_v(0); -- unsigned ALU operation? (SLTIU, SLTU) else -- branches - ctrl_nxt.alu_unsigned <= execute_engine.ir(instr_funct3_lsb_c+1); -- unsigned branches? (BLTU, BGEU) + ctrl_nxt.alu_unsigned <= funct3_v(1); -- unsigned branches? (BLTU, BGEU) end if; -- ALU operand A: is PC? -- @@ -754,7 +744,7 @@ begin end case; -- memory read/write access -- - if CPU_EXTENSION_RISCV_A and (decode_aux.opcode(2) = opcode_amo_c(2)) then -- lr/sc + if CPU_EXTENSION_RISCV_A and (decode_aux.opcode(2) = opcode_amo_c(2)) then -- atomic lr/sc ctrl_nxt.lsu_rw <= execute_engine.ir(instr_funct7_lsb_c+2); else -- normal load/store ctrl_nxt.lsu_rw <= execute_engine.ir(5); @@ -770,7 +760,7 @@ begin -- if (trap_ctrl.env_pending = '1') or (trap_ctrl.exc_fire = '1') then -- pending trap or pending exception (fast) execute_engine.state_nxt <= TRAP_ENTER; - elsif (hw_trigger_match = '1') and CPU_EXTENSION_RISCV_Sdtrig then -- hardware breakpoint + elsif CPU_EXTENSION_RISCV_Sdtrig and (hw_trigger_match = '1') then -- hardware breakpoint execute_engine.pc_we <= '1'; -- pc <= next_pc; intercept BEFORE executing the instruction trap_ctrl.hwtrig <= '1'; execute_engine.state_nxt <= DISPATCH; -- stay here another round until trap_ctrl.hwtrig arrives in trap_ctrl.env_pending @@ -790,7 +780,7 @@ begin execute_engine.state_nxt <= RESTART; end if; - when TRAP_EXIT => -- return from trap environment and jump to xEPC + when TRAP_EXIT => -- return from trap environment and jump to trap PC -- ------------------------------------------------------------ trap_ctrl.env_exit <= '1'; execute_engine.state_nxt <= RESTART; @@ -810,7 +800,7 @@ begin when opcode_alu_c | opcode_alui_c => -- ALU core operation -- - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + case funct3_v is when funct3_subadd_c => ctrl_nxt.alu_op <= alu_op_add_c; -- ADD(I), SUB when funct3_slt_c | funct3_sltu_c => ctrl_nxt.alu_op <= alu_op_slt_c; -- SLT(I), SLTU(I) when funct3_xor_c => ctrl_nxt.alu_op <= alu_op_xor_c; -- XOR(I) @@ -820,52 +810,53 @@ begin end case; -- addition/subtraction control -- - if (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c+1) = funct3_slt_c(2 downto 1)) or -- SLT(I), SLTU(I) - ((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_subadd_c) and - (execute_engine.ir(instr_opcode_msb_c-1) = '1') and (execute_engine.ir(instr_funct7_msb_c-1) = '1')) then -- SUB + if (funct3_v(2 downto 1) = funct3_slt_c(2 downto 1)) or -- SLT(I), SLTU(I) + ((funct3_v = funct3_subadd_c) and (decode_aux.opcode(5) = '1') and (execute_engine.ir(instr_funct7_msb_c-1) = '1')) then -- SUB ctrl_nxt.alu_sub <= '1'; end if; -- EXT: co-processor MULDIV operation (multi-cycle) -- - if (CPU_EXTENSION_RISCV_M and (execute_engine.ir(instr_opcode_lsb_c+5) = opcode_alu_c(5)) and ((decode_aux.is_m_mul = '1') or (decode_aux.is_m_div = '1'))) or -- MUL/DIV - (CPU_EXTENSION_RISCV_Zmmul and (execute_engine.ir(instr_opcode_lsb_c+5) = opcode_alu_c(5)) and (decode_aux.is_m_mul = '1')) then -- MUL + if (CPU_EXTENSION_RISCV_M and (decode_aux.opcode(5) = opcode_alu_c(5)) and ((decode_aux.is_m_mul = '1') or (decode_aux.is_m_div = '1'))) or -- MUL/DIV + (CPU_EXTENSION_RISCV_Zmmul and (decode_aux.opcode(5) = opcode_alu_c(5)) and (decode_aux.is_m_mul = '1')) then -- MUL ctrl_nxt.alu_cp_trig(cp_sel_muldiv_c) <= '1'; -- trigger MULDIV co-processor execute_engine.state_nxt <= ALU_WAIT; -- EXT: co-processor BIT-MANIPULATION operation (multi-cycle) -- elsif CPU_EXTENSION_RISCV_B and - (((execute_engine.ir(instr_opcode_lsb_c+5) = opcode_alu_c(5)) and (decode_aux.is_b_reg = '1')) or -- register operation - ((execute_engine.ir(instr_opcode_lsb_c+5) = opcode_alui_c(5)) and (decode_aux.is_b_imm = '1'))) then -- immediate operation + (((decode_aux.opcode(5) = opcode_alu_c(5)) and (decode_aux.is_b_reg = '1')) or -- register operation + ((decode_aux.opcode(5) = opcode_alui_c(5)) and (decode_aux.is_b_imm = '1'))) then -- immediate operation ctrl_nxt.alu_cp_trig(cp_sel_bitmanip_c) <= '1'; -- trigger BITMANIP co-processor execute_engine.state_nxt <= ALU_WAIT; -- EXT: co-processor CONDITIONAL operation (multi-cycle) -- - elsif CPU_EXTENSION_RISCV_Zicond and (decode_aux.is_zicond = '1') and (execute_engine.ir(instr_opcode_lsb_c+5) = opcode_alu_c(5)) then + elsif CPU_EXTENSION_RISCV_Zicond and (decode_aux.is_zicond = '1') and (decode_aux.opcode(5) = opcode_alu_c(5)) then ctrl_nxt.alu_cp_trig(cp_sel_cond_c) <= '1'; -- trigger COND co-processor execute_engine.state_nxt <= ALU_WAIT; -- BASE: co-processor SHIFT operation (multi-cycle) -- - elsif (execute_engine.ir(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01") then -- sll/sr + elsif (funct3_v(1 downto 0) = "01") then -- sll/sr ctrl_nxt.alu_cp_trig(cp_sel_shifter_c) <= '1'; -- trigger SHIFTER co-processor execute_engine.state_nxt <= ALU_WAIT; -- BASE: ALU CORE operation (single-cycle) -- else - ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back + ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back (won't happen if exception) execute_engine.state_nxt <= DISPATCH; end if; - -- load upper immediate / add upper immediate to PC -- - when opcode_lui_c | opcode_auipc_c => - if (execute_engine.ir(instr_opcode_lsb_c+5) = opcode_lui_c(5)) then -- LUI - ctrl_nxt.alu_op <= alu_op_movb_c; -- pass immediate - else -- AUIPC - ctrl_nxt.alu_op <= alu_op_add_c; -- add PC and immediate - end if; - ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back + -- load upper immediate -- + when opcode_lui_c => + ctrl_nxt.alu_op <= alu_op_movb_c; -- pass immediate + ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back (won't happen if exception) + execute_engine.state_nxt <= DISPATCH; + + -- add upper immediate to PC -- + when opcode_auipc_c => + ctrl_nxt.alu_op <= alu_op_add_c; -- add PC and immediate + ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back (won't happen if exception) execute_engine.state_nxt <= DISPATCH; -- memory access -- when opcode_load_c | opcode_store_c | opcode_amo_c => execute_engine.state_nxt <= MEM_REQ; - -- branch / jump and link / with register -- + -- branch / jump-and-link (with register) -- when opcode_branch_c | opcode_jal_c | opcode_jalr_c => execute_engine.state_nxt <= BRANCH; @@ -895,13 +886,13 @@ begin -- ------------------------------------------------------------ ctrl_nxt.alu_op <= alu_op_cp_c; if (alu_cp_done_i = '1') or (trap_ctrl.exc_buf(exc_illegal_c) = '1') then - ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back (won't happen in case of an illegal instruction) + ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back (won't happen if exception) execute_engine.state_nxt <= DISPATCH; end if; when BRANCH => -- update next_pc on taken branches and jumps -- ------------------------------------------------------------ - ctrl_nxt.rf_wb_en <= execute_engine.ir(instr_opcode_lsb_c+2); -- save return address if link operation (will not happen if misaligned) + ctrl_nxt.rf_wb_en <= decode_aux.opcode(2); -- save return address if link operation (won't happen if exception) if (trap_ctrl.exc_buf(exc_illegal_c) = '0') and (execute_engine.branch_taken = '1') then -- valid taken branch fetch_engine.reset <= '1'; -- reset instruction fetch to restart at modified PC execute_engine.state_nxt <= BRANCHED; -- shortcut (faster than going to RESTART) @@ -922,12 +913,11 @@ begin when MEM_WAIT => -- wait for bus transaction to finish -- ------------------------------------------------------------ - if (lsu_wait_i = '0') or -- bus system has completed the transaction + if (lsu_wait_i = '0') or -- bus system has completed the transaction (if there was any) (trap_ctrl.exc_buf(exc_saccess_c) = '1') or (trap_ctrl.exc_buf(exc_laccess_c) = '1') or -- access exception (trap_ctrl.exc_buf(exc_salign_c) = '1') or (trap_ctrl.exc_buf(exc_lalign_c) = '1') or -- alignment exception (trap_ctrl.exc_buf(exc_illegal_c) = '1') then -- illegal instruction exception - if (CPU_EXTENSION_RISCV_A and (decode_aux.opcode(2) = opcode_amo_c(2))) or -- atomic operation - (execute_engine.ir(instr_opcode_msb_c-1) = '0') then -- normal load + if (CPU_EXTENSION_RISCV_A and (decode_aux.opcode(2) = opcode_amo_c(2))) or (decode_aux.opcode(5) = '0') then -- atomic operation / normal load ctrl_nxt.rf_wb_en <= '1'; -- allow write-back to register file (won't happen in case of exception) end if; execute_engine.state_nxt <= DISPATCH; @@ -939,32 +929,24 @@ begin execute_engine.state_nxt <= DISPATCH; end if; - when others => -- SYSTEM - system environment operation; no effect if illegal instruction + when others => -- SYSTEM - CSR/ENVIRONMENT operation; no effect if illegal instruction -- ------------------------------------------------------------ - if (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) and (trap_ctrl.exc_buf(exc_illegal_c) = '0') then -- non-illegal ENVIRONMENT - -- three LSBs are sufficient to distinguish environment instructions -- - if (execute_engine.ir(instr_funct12_lsb_c+2 downto instr_funct12_lsb_c) = "000") then - trap_ctrl.ecall <= '1'; -- ecall - end if; - if (execute_engine.ir(instr_funct12_lsb_c+2 downto instr_funct12_lsb_c) = "001") then - trap_ctrl.ebreak <= '1'; -- ebreak - end if; - if (execute_engine.ir(instr_funct12_lsb_c+2 downto instr_funct12_lsb_c) = "010") then - execute_engine.state_nxt <= TRAP_EXIT; -- xret - elsif (execute_engine.ir(instr_funct12_lsb_c+2 downto instr_funct12_lsb_c) = "101") then - execute_engine.state_nxt <= SLEEP; -- wfi - else - execute_engine.state_nxt <= DISPATCH; -- default - end if; - else -- CSR ACCESS - no state change if illegal instruction - if (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csrrw_c) or -- CSRRW: always write CSR - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csrrwi_c) or -- CSRRWI: always write CSR - (decode_aux.rs1_zero = '0') then -- CSRR(S/C)(I): write CSR if rs1/imm5 is NOT zero - csr.we_nxt <= '1'; - end if; - ctrl_nxt.rf_wb_en <= '1'; -- valid RF write-back - execute_engine.state_nxt <= DISPATCH; + execute_engine.state_nxt <= DISPATCH; -- default + if (funct3_v = funct3_env_c) and (trap_ctrl.exc_buf(exc_illegal_c) = '0') then -- non-illegal ENVIRONMENT + case execute_engine.ir(instr_funct12_lsb_c+2 downto instr_funct12_lsb_c) is -- three LSBs are sufficient here + when "000" => trap_ctrl.ecall <= '1'; -- ecall + when "001" => trap_ctrl.ebreak <= '1'; -- ebreak + when "010" => execute_engine.state_nxt <= TRAP_EXIT; -- xret + when "101" => execute_engine.state_nxt <= SLEEP; -- wfi + when others => execute_engine.state_nxt <= DISPATCH; -- illegal or CSR operation + end case; + end if; + -- always write to CSR (if CSR instruction); ENVIRONMENT operations have rs1/imm5 = zero so this won't happen then -- + if (funct3_v = funct3_csrrw_c) or (funct3_v = funct3_csrrwi_c) or (execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c) /= "00000") then + csr.we_nxt <= '1'; -- CSRRW[I]: always write CSR; CSRR[S/C][I]: write CSR if rs1/imm5 is NOT zero end if; + -- always write to RF; ENVIRONMENT operations have rd = zero so this does not hurt -- + ctrl_nxt.rf_wb_en <= '1'; -- won't happen if exception end case; end process execute_engine_fsm_comb; @@ -974,7 +956,7 @@ begin -- ------------------------------------------------------------------------------------------- -- register file -- - ctrl_o.rf_wb_en <= ctrl.rf_wb_en and (not or_reduce_f(trap_ctrl.exc_buf)); -- inhibit write-back if exception + ctrl_o.rf_wb_en <= ctrl.rf_wb_en and (not trap_ctrl.exc_fire); -- inhibit write-back if exception ctrl_o.rf_rs1 <= execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c); ctrl_o.rf_rs2 <= execute_engine.ir(instr_rs2_msb_c downto instr_rs2_lsb_c); ctrl_o.rf_rd <= execute_engine.ir(instr_rd_msb_c downto instr_rd_lsb_c); @@ -990,7 +972,7 @@ begin ctrl_o.lsu_req <= ctrl.lsu_req; ctrl_o.lsu_rw <= ctrl.lsu_rw; ctrl_o.lsu_mo_we <= '1' when (execute_engine.state = MEM_REQ) else '0'; -- write memory output registers (data & address) - ctrl_o.lsu_fence <= ctrl.lsu_fence; -- fence(.i) + ctrl_o.lsu_fence <= ctrl.lsu_fence; ctrl_o.lsu_priv <= csr.mstatus_mpp when (csr.mstatus_mprv = '1') else csr.privilege_eff; -- effective privilege level for loads/stores in M-mode -- instruction word bit fields -- ctrl_o.ir_funct3 <= execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c); @@ -1027,7 +1009,7 @@ begin -- CSR Access Check ----------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - csr_check: process(execute_engine.ir, decode_aux.rs1_zero, csr, debug_ctrl) + csr_check: process(execute_engine.ir, debug_ctrl.running, csr) variable csr_addr_v : std_ulogic_vector(11 downto 0); begin -- CSR address right from the instruction word -- @@ -1100,7 +1082,7 @@ begin if (csr_addr_v(11 downto 10) = "11") and -- CSR is read-only ((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csrrw_c) or -- will always write to CSR (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csrrwi_c) or -- will always write to CSR - (decode_aux.rs1_zero = '0')) then -- clear/set instructions: write to CSR only if rs1/imm5 is NOT zero + (execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c) /= "00000")) then -- clear/set instructions: write to CSR only if rs1/imm5 is NOT zero csr_valid(1) <= '0'; -- invalid access else csr_valid(1) <= '1'; -- access granted @@ -1112,9 +1094,9 @@ begin if (csr_addr_v(11 downto 2) = csr_dcsr_c(11 downto 2)) and -- debug-mode-only CSR (dcsr, dpc, dscratch)? CPU_EXTENSION_RISCV_Sdext and (debug_ctrl.running = '0') then -- debug-mode implemented and not running? csr_valid(0) <= '0'; -- invalid access - elsif (csr_addr_v(11 downto 8) = csr_cycle_c(11 downto 8)) and -- user-mode counter access - CPU_EXTENSION_RISCV_Zicntr and CPU_EXTENSION_RISCV_U and (csr.privilege_eff = '0') and -- any user-mode counters available and in user-mode? - (((csr_addr_v(1 downto 0) = csr_cycle_c(1 downto 0)) and (csr.mcounteren_cy = '0')) or -- illegal access to cycle + elsif CPU_EXTENSION_RISCV_Zicntr and CPU_EXTENSION_RISCV_U and (csr.privilege_eff = '0') and -- any user-mode counters available and in user-mode? + (csr_addr_v(11 downto 8) = csr_cycle_c(11 downto 8)) and -- user-mode counter access + (((csr_addr_v(1 downto 0) = csr_cycle_c(1 downto 0)) and (csr.mcounteren_cy = '0')) or -- illegal access to cycle ((csr_addr_v(1 downto 0) = csr_instret_c(1 downto 0)) and (csr.mcounteren_ir = '0'))) then -- illegal access to instret csr_valid(0) <= '0'; -- invalid access elsif (csr_addr_v(9 downto 8) /= "00") and (csr.privilege_eff = '0') then -- invalid privilege level @@ -1129,55 +1111,55 @@ begin -- Illegal Instruction Check -------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- illegal_check: process(execute_engine, decode_aux, csr, csr_valid, debug_ctrl) + variable f7_v : std_ulogic_vector(6 downto 0); + variable f3_v : std_ulogic_vector(2 downto 0); begin + -- shortcuts -- + f7_v := execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c); -- funct7 + f3_v := execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c); -- funct3 + + -- check opcode-groups -- illegal_cmd <= '0'; -- default case decode_aux.opcode is - when opcode_lui_c | opcode_auipc_c | opcode_jal_c => + when opcode_lui_c | opcode_auipc_c | opcode_jal_c => -- U-instruction type illegal_cmd <= '0'; -- all encodings are valid - when opcode_jalr_c => - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + when opcode_jalr_c => -- unconditional jump-and-link + case f3_v is when "000" => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; - when opcode_branch_c => - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + when opcode_branch_c => -- conditional branch + case f3_v is when funct3_beq_c | funct3_bne_c | funct3_blt_c | funct3_bge_c | funct3_bltu_c | funct3_bgeu_c => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; - when opcode_load_c => - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + when opcode_load_c => -- memory load + case f3_v is when funct3_lb_c | funct3_lh_c | funct3_lw_c | funct3_lbu_c | funct3_lhu_c => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; - when opcode_store_c => - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + when opcode_store_c => -- memory store + case f3_v is when funct3_sb_c | funct3_sh_c | funct3_sw_c => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; - when opcode_amo_c => - if CPU_EXTENSION_RISCV_A and (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "010") and - (execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c+3) = "0001") then -- LR.W/SC.W + when opcode_amo_c => -- atomic memory operation (LR/SC) + if CPU_EXTENSION_RISCV_A and (f3_v = "010") and (f7_v(6 downto 3) = "0001") then -- LR.W/SC.W illegal_cmd <= '0'; else illegal_cmd <= '1'; end if; - when opcode_alu_c => - if ((((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_subadd_c) or (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sr_c)) and - (execute_engine.ir(instr_funct7_msb_c-2 downto instr_funct7_lsb_c) = "00000") and (execute_engine.ir(instr_funct7_msb_c) = '0')) or - (((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sll_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_slt_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sltu_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_xor_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_or_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_and_c)) and - (execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000000"))) or -- valid base ALU instruction? + when opcode_alu_c => -- register-register ALU operation + if ((((f3_v = funct3_subadd_c) or (f3_v = funct3_sr_c)) and (f7_v(4 downto 0) = "00000") and (f7_v(6) = '0')) or + (((f3_v = funct3_sll_c) or (f3_v = funct3_slt_c) or (f3_v = funct3_sltu_c) or (f3_v = funct3_xor_c) or (f3_v = funct3_or_c) or (f3_v = funct3_and_c)) and (f7_v = "0000000")) + ) or -- valid base ALU instruction? ((CPU_EXTENSION_RISCV_M or CPU_EXTENSION_RISCV_Zmmul) and (decode_aux.is_m_mul = '1')) or -- valid MUL instruction? (CPU_EXTENSION_RISCV_M and (decode_aux.is_m_div = '1')) or -- valid DIV instruction? (CPU_EXTENSION_RISCV_B and (decode_aux.is_b_reg = '1')) or -- valid BITMANIP register instruction? @@ -1187,33 +1169,28 @@ begin illegal_cmd <= '1'; end if; - when opcode_alui_c => - if ((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_subadd_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_slt_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sltu_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_xor_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_or_c) or - (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_and_c) or - ((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sll_c) and (execute_engine.ir(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000000")) or - ((execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_sr_c) and - ((execute_engine.ir(instr_funct7_msb_c-2 downto instr_funct7_lsb_c) = "00000") and (execute_engine.ir(instr_funct7_msb_c) = '0')))) or -- valid base ALUI instruction? + when opcode_alui_c => -- register-immediate ALU operation + if ((f3_v = funct3_subadd_c) or (f3_v = funct3_slt_c) or (f3_v = funct3_sltu_c) or (f3_v = funct3_xor_c) or (f3_v = funct3_or_c) or (f3_v = funct3_and_c) or + ((f3_v = funct3_sll_c) and (f7_v = "0000000")) or + ((f3_v = funct3_sr_c) and ((f7_v(4 downto 0) = "00000") and (f7_v(6) = '0'))) + ) or -- valid base ALUI instruction? (CPU_EXTENSION_RISCV_B and (decode_aux.is_b_imm = '1')) then -- valid BITMANIP immediate instruction? illegal_cmd <= '0'; else illegal_cmd <= '1'; end if; - when opcode_fence_c => - case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is + when opcode_fence_c => -- memory ordering + case f3_v is when funct3_fence_c | funct3_fencei_c => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; - when opcode_system_c => - if (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system environment - if (decode_aux.rs1_zero = '1') and (decode_aux.rd_zero = '1') then + when opcode_system_c => -- CSR and system instructions + if (f3_v = funct3_env_c) then -- system environment + if (execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c) = "00000") and (execute_engine.ir(instr_rd_msb_c downto instr_rd_lsb_c) = "00000") then case execute_engine.ir(instr_funct12_msb_c downto instr_funct12_lsb_c) is - when funct12_ecall_c | funct12_ebreak_c => illegal_cmd <= '0'; -- ecall, ebreak + when funct12_ecall_c | funct12_ebreak_c => illegal_cmd <= '0'; -- ecall and ebreak are always allowed when funct12_mret_c => illegal_cmd <= (not csr.privilege) or debug_ctrl.running; -- mret allowed in (real/non-debug) M-mode only when funct12_dret_c => illegal_cmd <= not debug_ctrl.running; -- dret allowed in debug mode only when funct12_wfi_c => illegal_cmd <= (not csr.privilege) and csr.mstatus_tw; -- wfi allowed in M-mode or if TW is zero @@ -1222,20 +1199,20 @@ begin else illegal_cmd <= '1'; end if; - elsif (csr_valid /= "111") or (execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csril_c) then -- invalid CSR access? + elsif (csr_valid /= "111") or (f3_v = funct3_csril_c) then -- invalid CSR access? illegal_cmd <= '1'; else illegal_cmd <= '0'; end if; - when opcode_fop_c => + when opcode_fop_c => -- floating-point operations illegal_cmd <= not bool_to_ulogic_f(CPU_EXTENSION_RISCV_Zfinx); -- valid encodings checked by FPU - when opcode_cust0_c | opcode_cust1_c => + when opcode_cust0_c | opcode_cust1_c => -- custom instructions illegal_cmd <= not bool_to_ulogic_f(CPU_EXTENSION_RISCV_Zxcfu); -- all encodings valid if CFU enable - when others => - illegal_cmd <= '1'; -- undefined/unimplemented/illegal opcode + when others => -- undefined/unimplemented/illegal opcode + illegal_cmd <= '1'; end case; end process illegal_check; @@ -1310,7 +1287,7 @@ begin -- Interrupt-Pending Buffer --------------------------------------------- -- Once triggered the interrupt line should stay active until explicitly - -- cleared by a mechanism specific to the interrupt-causing module. + -- cleared by a mechanism specific to the interrupt-causing source. -- ---------------------------------------------------------------------- -- RISC-V machine interrupts -- @@ -1355,6 +1332,7 @@ begin if (rstn_i = '0') then trap_ctrl.cause <= (others => '0'); elsif rising_edge(clk_i) then + trap_ctrl.cause <= (others => '0'); -- default -- standard RISC-V exceptions -- if (trap_ctrl.exc_buf(exc_iaccess_c) = '1') then trap_ctrl.cause <= trap_iaf_c; -- instruction access fault elsif (trap_ctrl.exc_buf(exc_illegal_c) = '1') then trap_ctrl.cause <= trap_iil_c; -- illegal instruction @@ -1391,7 +1369,7 @@ begin elsif (trap_ctrl.irq_buf(irq_mei_irq_c) = '1') then trap_ctrl.cause <= trap_mei_c; -- machine external interrupt (MEI) elsif (trap_ctrl.irq_buf(irq_msi_irq_c) = '1') then trap_ctrl.cause <= trap_msi_c; -- machine software interrupt (MSI) elsif (trap_ctrl.irq_buf(irq_mti_irq_c) = '1') then trap_ctrl.cause <= trap_mti_c; -- machine timer interrupt (MTI) - else trap_ctrl.cause <= (others => '0'); end if; + end if; end if; end process trap_priority; @@ -1483,7 +1461,7 @@ begin if (rstn_i = '0') then csr.addr <= (others => '0'); elsif rising_edge(clk_i) then - -- update only for actual CSR operations to reduce switching activity -- + -- update only for actual CSR operations to reduce switching activity on the CSR address net -- if (execute_engine.state = EXECUTE) and (decode_aux.opcode = opcode_system_c) then csr.addr(11 downto 10) <= execute_engine.ir(instr_imm12_lsb_c+11 downto instr_imm12_lsb_c+10); csr.addr(9 downto 8) <= replicate_f(execute_engine.ir(instr_imm12_lsb_c+8), 2); -- M-mode (11) and U-mode (00) CSRs only @@ -1743,11 +1721,11 @@ begin end if; -- ******************************************************************************** - -- Override - hardwire/terminate unimplemented registers/bits + -- Override - hardwire/terminate unavailable registers/bits -- ******************************************************************************** -- hardwired bits -- - csr.mcountinhibit(1) <= '0'; -- time[h] not implemented + csr.mcountinhibit(1) <= '0'; -- "time" not implemented -- no base counters -- if not CPU_EXTENSION_RISCV_Zicntr then @@ -2043,7 +2021,7 @@ begin -- **************************************************************************************************************************** --- CPU Counters (Standard Counters and Hardware Performance Monitors) +-- CPU Counters (Base Counters and Hardware Performance Monitors) -- **************************************************************************************************************************** -- Counter CSRs --------------------------------------------------------------------------- @@ -2097,7 +2075,7 @@ begin end generate; -- /cnt_gen -- read-back -- - cnt_connect: process(cnt) + cnt_readback: process(cnt) begin cnt_lo_rd <= (others => (others => '0')); cnt_hi_rd <= (others => (others => '0')); @@ -2119,7 +2097,7 @@ begin end if; end loop; end if; - end process cnt_connect; + end process cnt_readback; -- Hardware Performance Monitors (HPM) - Counter Event Configuration CSRs ----------------- @@ -2148,7 +2126,7 @@ begin if (hpmevent_we(i) = '1') then hpmevent_cfg(i) <= csr.wdata(hpmcnt_event_width_c-1 downto 0); end if; - hpmevent_cfg(i)(hpmcnt_event_tm_c) <= '0'; -- time, unused/reserved + hpmevent_cfg(i)(hpmcnt_event_tm_c) <= '0'; -- time: not available end if; end process hpmevent_reg; -- read-back -- @@ -2181,36 +2159,32 @@ begin if (rstn_i = '0') then cnt.inc <= (others => (others => '0')); elsif rising_edge(clk_i) then - cnt.inc <= (others => (others => '0')); -- default -- base counters -- - if CPU_EXTENSION_RISCV_Zicntr then - cnt.inc(0) <= (others => (cnt_event(hpmcnt_event_cy_c) and (not csr.mcountinhibit(0)) and (not debug_ctrl.running))); - cnt.inc(2) <= (others => (cnt_event(hpmcnt_event_ir_c) and (not csr.mcountinhibit(2)) and (not debug_ctrl.running))); - end if; + cnt.inc(0) <= (others => (cnt_event(hpmcnt_event_cy_c) and (not csr.mcountinhibit(0)) and (not debug_ctrl.running))); + cnt.inc(1) <= (others => '0'); -- time: not available + cnt.inc(2) <= (others => (cnt_event(hpmcnt_event_ir_c) and (not csr.mcountinhibit(2)) and (not debug_ctrl.running))); -- hpm counters -- - if CPU_EXTENSION_RISCV_Zihpm and (hpm_num_c > 0) then - for i in 3 to (hpm_num_c+3)-1 loop - cnt.inc(i) <= (others => (or_reduce_f(cnt_event and hpmevent_cfg(i)) and (not csr.mcountinhibit(i)) and (not debug_ctrl.running))); - end loop; - end if; + for i in 3 to 15 loop + cnt.inc(i) <= (others => (or_reduce_f(cnt_event and hpmevent_cfg(i)) and (not csr.mcountinhibit(i)) and (not debug_ctrl.running))); + end loop; end if; end process counter_event; - -- RISC-V-specific base counter events (for HPM and base counters) -- + -- RISC-V-compliant counter events -- cnt_event(hpmcnt_event_cy_c) <= '1' when (sleep_mode = '0') else '0'; -- cycle: active cycle - cnt_event(hpmcnt_event_tm_c) <= '0'; -- time: unused/reserved + cnt_event(hpmcnt_event_tm_c) <= '0'; -- time: not available cnt_event(hpmcnt_event_ir_c) <= '1' when (execute_engine.state = EXECUTE) else '0'; -- instret: retired (==executed) instruction - -- NEORV32-specific counter events (for HPM counters only) -- - cnt_event(hpmcnt_event_compr_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- executed compressed instruction - cnt_event(hpmcnt_event_wait_dis_c) <= '1' when (execute_engine.state = DISPATCH) and (issue_engine.valid = "00") else '0'; -- instruction dispatch wait cycle - cnt_event(hpmcnt_event_wait_alu_c) <= '1' when (execute_engine.state = ALU_WAIT) else '0'; -- multi-cycle ALU co-processor wait cycle - cnt_event(hpmcnt_event_branch_c) <= '1' when (execute_engine.state = BRANCH) else '0'; -- executed branch instruction - cnt_event(hpmcnt_event_branched_c) <= '1' when (execute_engine.state = BRANCHED) else '0'; -- control flow transfer - cnt_event(hpmcnt_event_load_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '0') else '0'; -- executed load operation - cnt_event(hpmcnt_event_store_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '1') else '0'; -- executed store operation - cnt_event(hpmcnt_event_wait_lsu_c) <= '1' when (ctrl.lsu_req = '0') and (execute_engine.state = MEM_WAIT) else '0'; -- load/store unit memory wait cycle - cnt_event(hpmcnt_event_trap_c) <= '1' when (trap_ctrl.env_enter = '1') else '0'; -- entered trap + -- NEORV32-specific counter events -- + cnt_event(hpmcnt_event_compr_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- executed compressed instruction + cnt_event(hpmcnt_event_wait_dis_c) <= '1' when (execute_engine.state = DISPATCH) and (issue_engine.valid = "00") else '0'; -- instruction dispatch wait cycle + cnt_event(hpmcnt_event_wait_alu_c) <= '1' when (execute_engine.state = ALU_WAIT) else '0'; -- multi-cycle ALU co-processor wait cycle + cnt_event(hpmcnt_event_branch_c) <= '1' when (execute_engine.state = BRANCH) else '0'; -- executed branch instruction + cnt_event(hpmcnt_event_branched_c) <= '1' when (execute_engine.state = BRANCHED) else '0'; -- control flow transfer + cnt_event(hpmcnt_event_load_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '0') else '0'; -- executed load operation + cnt_event(hpmcnt_event_store_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '1') else '0'; -- executed store operation + cnt_event(hpmcnt_event_wait_lsu_c) <= '1' when (ctrl.lsu_req = '0') and (execute_engine.state = MEM_WAIT) else '0'; -- load/store unit memory wait cycle + cnt_event(hpmcnt_event_trap_c) <= '1' when (trap_ctrl.env_enter = '1') else '0'; -- entered trap -- **************************************************************************************************************************** @@ -2240,7 +2214,7 @@ begin end process debug_control; -- debug mode entry triggers -- - debug_ctrl.trig_hw <= trap_ctrl.hwtrig and (not debug_ctrl.running) and csr.tdata1_action and csr.tdata1_dmode; -- enter debug mode by HW trigger module request (only valid if dmode = 1) + debug_ctrl.trig_hw <= trap_ctrl.hwtrig and (not debug_ctrl.running) and csr.tdata1_action and csr.tdata1_dmode; -- enter debug mode by HW trigger module request if dmode is set debug_ctrl.trig_break <= trap_ctrl.ebreak and (debug_ctrl.running or -- re-enter debug mode (( csr.privilege) and csr.dcsr_ebreakm) or -- enabled goto-debug-mode in machine mode on "ebreak" ((not csr.privilege) and csr.dcsr_ebreaku)); -- enabled goto-debug-mode in user mode on "ebreak" diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index 143e3af43..5eb0158c7 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100304"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100305"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -137,8 +137,8 @@ package neorv32_package is -- bus response -- type bus_rsp_t is record data : std_ulogic_vector(31 downto 0); -- read data, valid if ack=1 - ack : std_ulogic; -- set if access acknowledge, single-shot, has priority over err - err : std_ulogic; -- set if access error, single-shot + ack : std_ulogic; -- set if access acknowledge, single-shot + err : std_ulogic; -- set if access error, single-shot, has priority over ack end record; -- source (request) termination -- diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index bb7e96f77..38165f8cf 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -89,7 +89,7 @@ entity neorv32_top is XBUS_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 32; -- x-cache: block size in bytes (min 4), has to be a power of 2 -- Execute in-place module (XIP) -- - XIP_EN : boolean := false; -- implement execute in place module (XIP)? + XIP_EN : boolean := false; -- implement execute in-place module (XIP)? XIP_CACHE_EN : boolean := false; -- implement XIP cache? XIP_CACHE_NUM_BLOCKS : natural range 1 to 256 := 8; -- number of blocks (min 1), has to be a power of 2 XIP_CACHE_BLOCK_SIZE : natural range 1 to 2**16 := 256; -- block size in bytes (min 4), has to be a power of 2 @@ -325,35 +325,35 @@ begin -- show SoC configuration -- assert false report "[NEORV32] Processor Configuration: " & - cond_sel_string_f(MEM_INT_IMEM_EN, "IMEM ", "") & - cond_sel_string_f(MEM_INT_DMEM_EN, "DMEM ", "") & - cond_sel_string_f(INT_BOOTLOADER_EN, "BOOTROM ", "") & - cond_sel_string_f(ICACHE_EN, "I-CACHE ", "") & - cond_sel_string_f(DCACHE_EN, "D-CACHE ", "") & - cond_sel_string_f(XBUS_EN, "XBUS ", "") & - cond_sel_string_f(XBUS_EN and XBUS_CACHE_EN, "X-CACHE ", "") & - cond_sel_string_f(XIP_EN, "XIP ", "") & - cond_sel_string_f(XIP_EN and XIP_CACHE_EN, "XIP-CACHE ", "") & - cond_sel_string_f(io_gpio_en_c, "GPIO ", "") & - cond_sel_string_f(IO_MTIME_EN, "MTIME ", "") & - cond_sel_string_f(IO_UART0_EN, "UART0 ", "") & - cond_sel_string_f(IO_UART1_EN, "UART1 ", "") & - cond_sel_string_f(IO_SPI_EN, "SPI ", "") & - cond_sel_string_f(IO_SDI_EN, "SDI ", "") & - cond_sel_string_f(IO_TWI_EN, "TWI ", "") & - cond_sel_string_f(io_pwm_en_c, "PWM ", "") & - cond_sel_string_f(IO_WDT_EN, "WDT ", "") & - cond_sel_string_f(IO_TRNG_EN, "TRNG ", "") & - cond_sel_string_f(IO_CFS_EN, "CFS ", "") & - cond_sel_string_f(IO_NEOLED_EN, "NEOLED ", "") & - cond_sel_string_f(io_xirq_en_c, "XIRQ ", "") & - cond_sel_string_f(IO_GPTMR_EN, "GPTMR ", "") & - cond_sel_string_f(IO_ONEWIRE_EN, "ONEWIRE ", "") & - cond_sel_string_f(IO_DMA_EN, "DMA ", "") & - cond_sel_string_f(IO_SLINK_EN, "SLINK ", "") & - cond_sel_string_f(IO_CRC_EN, "CRC ", "") & - cond_sel_string_f(io_sysinfo_en_c, "SYSINFO ", "") & - cond_sel_string_f(ON_CHIP_DEBUGGER_EN, "OCD ", "") & + cond_sel_string_f(MEM_INT_IMEM_EN, "IMEM ", "") & + cond_sel_string_f(MEM_INT_DMEM_EN, "DMEM ", "") & + cond_sel_string_f(INT_BOOTLOADER_EN, "BOOTROM ", "") & + cond_sel_string_f(ICACHE_EN, "I-CACHE ", "") & + cond_sel_string_f(DCACHE_EN, "D-CACHE ", "") & + cond_sel_string_f(XBUS_EN, "XBUS ", "") & + cond_sel_string_f(XBUS_EN and XBUS_CACHE_EN, "XBUS-CACHE ", "") & + cond_sel_string_f(XIP_EN, "XIP ", "") & + cond_sel_string_f(XIP_EN and XIP_CACHE_EN, "XIP-CACHE ", "") & + cond_sel_string_f(io_gpio_en_c, "GPIO ", "") & + cond_sel_string_f(IO_MTIME_EN, "MTIME ", "") & + cond_sel_string_f(IO_UART0_EN, "UART0 ", "") & + cond_sel_string_f(IO_UART1_EN, "UART1 ", "") & + cond_sel_string_f(IO_SPI_EN, "SPI ", "") & + cond_sel_string_f(IO_SDI_EN, "SDI ", "") & + cond_sel_string_f(IO_TWI_EN, "TWI ", "") & + cond_sel_string_f(io_pwm_en_c, "PWM ", "") & + cond_sel_string_f(IO_WDT_EN, "WDT ", "") & + cond_sel_string_f(IO_TRNG_EN, "TRNG ", "") & + cond_sel_string_f(IO_CFS_EN, "CFS ", "") & + cond_sel_string_f(IO_NEOLED_EN, "NEOLED ", "") & + cond_sel_string_f(io_xirq_en_c, "XIRQ ", "") & + cond_sel_string_f(IO_GPTMR_EN, "GPTMR ", "") & + cond_sel_string_f(IO_ONEWIRE_EN, "ONEWIRE ", "") & + cond_sel_string_f(IO_DMA_EN, "DMA ", "") & + cond_sel_string_f(IO_SLINK_EN, "SLINK ", "") & + cond_sel_string_f(IO_CRC_EN, "CRC ", "") & + cond_sel_string_f(io_sysinfo_en_c, "SYSINFO ", "") & + cond_sel_string_f(ON_CHIP_DEBUGGER_EN, "OCD ", "") & "" severity note; diff --git a/sw/example/processor_check/main.c b/sw/example/processor_check/main.c index a4447ab29..3baf40702 100644 --- a/sw/example/processor_check/main.c +++ b/sw/example/processor_check/main.c @@ -561,12 +561,14 @@ int main() { tmp_a = trap_cnt; // current number of traps // try executing some illegal instructions - asm volatile (".align 4"); - asm volatile (".word 0x0e00202f"); // amoswap.w x0, x0, (x0) + asm volatile (".word 0x58007053"); // unsupported fsqrt.s x0, x0 + asm volatile (".word 0x0e00202f"); // unsupported amoswap.w x0, x0, (x0) asm volatile (".word 0x34004073"); // illegal CSR access funct3 (using mscratch) asm volatile (".word 0x30200077"); // mret with illegal opcode asm volatile (".word 0x3020007f"); // mret with illegal opcode asm volatile (".word 0x7b200073"); // dret outside of debug mode + asm volatile (".word 0x00000079"); // ecall with rs1 != 0 + asm volatile (".word 0x00008073"); // ecall with rd != 0 asm volatile (".word 0x7b300073"); // illegal system funct12 asm volatile (".word 0xfe000033"); // illegal add funct7 asm volatile (".word 0x80002163"); // illegal branch funct3 (misaligned DST if C not available) @@ -574,7 +576,7 @@ int main() { asm volatile (".word 0xfe002fe3"); // illegal store funct3 if (neorv32_cpu_csr_read(CSR_MISA) & (1< - /**********************************************************************//** - * @name Register mappings + * @name Register aliases (physical names and ABI names) **************************************************************************/ asm ( ".set reg_x0, 0 \n" @@ -100,11 +99,6 @@ asm ( #define CUSTOM_INSTR_R2_TYPE(funct7, funct5, rs1, funct3, opcode) \ ({ \ uint32_t __return; \ - asm volatile ( \ - "" \ - : [output] "=r" (__return) \ - : [input_i] "r" (rs1) \ - ); \ asm volatile( \ ".word ( \ (((" #funct7 ") & 0x7f) << 25) | \ @@ -127,12 +121,6 @@ asm ( #define CUSTOM_INSTR_R3_TYPE(funct7, rs2, rs1, funct3, opcode) \ ({ \ uint32_t __return; \ - asm volatile ( \ - "" \ - : [output] "=r" (__return) \ - : [input_i] "r" (rs1), \ - [input_j] "r" (rs2) \ - ); \ asm volatile ( \ ".word ( \ (((" #funct7 ") & 0x7f) << 25) | \ @@ -156,13 +144,6 @@ asm ( #define CUSTOM_INSTR_R4_TYPE(rs3, rs2, rs1, funct3, opcode) \ ({ \ uint32_t __return; \ - asm volatile ( \ - "" \ - : [output] "=r" (__return) \ - : [input_i] "r" (rs1), \ - [input_j] "r" (rs2), \ - [input_k] "r" (rs3) \ - ); \ asm volatile ( \ ".word ( \ ((( reg_%3 ) & 0x1f) << 27) | \ @@ -187,11 +168,6 @@ asm ( #define CUSTOM_INSTR_I_TYPE(imm12, rs1, funct3, opcode) \ ({ \ uint32_t __return; \ - asm volatile ( \ - "" \ - : [output] "=r" (__return) \ - : [input_i] "r" (rs1) \ - ); \ asm volatile ( \ ".word ( \ (((" #imm12 ") & 0xfff) << 20) | \ @@ -212,12 +188,6 @@ asm ( **************************************************************************/ #define CUSTOM_INSTR_S_TYPE(imm12, rs2, rs1, funct3, opcode) \ ({ \ - asm volatile ( \ - "" \ - : \ - : [input_i] "r" (rs1), \ - [input_j] "r" (rs2) \ - ); \ asm volatile ( \ ".word ( \ ((((" #imm12 ") >> 5) & 0x7f) << 25) | \