From 1dbc6ab2910388033b0adab1ada82d5deffbb915 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:29:15 +0200 Subject: [PATCH 01/10] [rtl] add replication function to package --- rtl/core/neorv32_package.vhd | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index 377654684..7f01fac0b 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100100"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100101"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -699,6 +699,7 @@ package neorv32_package is function bswap_f(input : std_ulogic_vector) return std_ulogic_vector; function popcount_f(input : std_ulogic_vector) return natural; function leading_zeros_f(input : std_ulogic_vector) return natural; + function replicate_f(input : std_ulogic; num : natural) return std_ulogic_vector; impure function mem32_init_f(init : mem32_t; depth : natural) return mem32_t; function print_version_f(version : std_ulogic_vector(31 downto 0)) return string; @@ -1153,6 +1154,15 @@ package body neorv32_package is return cnt_v; end function leading_zeros_f; + -- Replicate input bit num times ---------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + function replicate_f(input : std_ulogic; num : natural) return std_ulogic_vector is + variable tmp_v : std_ulogic_vector(num-1 downto 0); + begin + tmp_v := (others => input); + return tmp_v; + end function replicate_f; + -- Initialize mem32_t array from another mem32_t array ------------------------------------ -- ------------------------------------------------------------------------------------------- impure function mem32_init_f(init : mem32_t; depth : natural) return mem32_t is From d1e677a0160b2216461aee251ac1c85cd2b19ae3 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:29:34 +0200 Subject: [PATCH 02/10] [rtl] cache: remove unused signal assignments --- rtl/core/neorv32_cache.vhd | 2 -- 1 file changed, 2 deletions(-) diff --git a/rtl/core/neorv32_cache.vhd b/rtl/core/neorv32_cache.vhd index 260a4b4eb..81be5d55c 100644 --- a/rtl/core/neorv32_cache.vhd +++ b/rtl/core/neorv32_cache.vhd @@ -229,9 +229,7 @@ begin -- direct accesses not implemented -- direct_acc_disable: if not UC_ENABLE generate - dir_acc_q <= '0'; dir_req_q <= req_terminate_c; - dir_rsp_q <= rsp_terminate_c; host_rsp_o <= cache_rsp; end generate; From a2663ba50e2dc65bbb67b50f9b25e3de5707ef09 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:29:52 +0200 Subject: [PATCH 03/10] [rtl] pmp: minor edits --- rtl/core/neorv32_cpu_pmp.vhd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rtl/core/neorv32_cpu_pmp.vhd b/rtl/core/neorv32_cpu_pmp.vhd index dcaf3b077..20abdac90 100644 --- a/rtl/core/neorv32_cpu_pmp.vhd +++ b/rtl/core/neorv32_cpu_pmp.vhd @@ -2,7 +2,7 @@ -- NEORV32 CPU - Physical Memory Protection Unit (RISC-V "Smpmp" Extension) -- -- -------------------------------------------------------------------------------- -- -- Compatible to the RISC-V PMP privilege architecture specifications. Granularity -- --- and supported modes can be constrained via generics to reduce area consumption. -- +-- and supported modes can be constrained via generics to reduce area requirements. -- -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- @@ -141,12 +141,12 @@ begin elsif rising_edge(clk_i) then -- configuration -- - if (csr.we_cfg(i/4) = '1') and (csr.cfg(i)(7) = '0') then -- unlocked write access - csr.cfg(i)(cfg_r_c) <= csr_wdata_i((i mod 4)*8+0); -- R (read) - csr.cfg(i)(cfg_w_c) <= csr_wdata_i((i mod 4)*8+1); -- W (write) - csr.cfg(i)(cfg_x_c) <= csr_wdata_i((i mod 4)*8+2); -- X (execute) + if (csr.we_cfg(i/4) = '1') and (csr.cfg(i)(cfg_l_c) = '0') then -- unlocked write access + csr.cfg(i)(cfg_r_c) <= csr_wdata_i((i mod 4)*8+cfg_r_c); -- R (read) + csr.cfg(i)(cfg_w_c) <= csr_wdata_i((i mod 4)*8+cfg_w_c); -- W (write) + csr.cfg(i)(cfg_x_c) <= csr_wdata_i((i mod 4)*8+cfg_x_c); -- X (execute) -- A (mode) -- - mode_v := csr_wdata_i((i mod 4)*8+4 downto (i mod 4)*8+3); + mode_v := csr_wdata_i((i mod 4)*8+cfg_ah_c downto (i mod 4)*8+cfg_al_c); if ((mode_v = mode_tor_c) and (not TOR_EN)) or -- TOR mode not implemented ((mode_v = mode_na4_c) and (not NAP_EN)) or -- NA4 mode not implemented ((mode_v = mode_napot_c) and (not NAP_EN)) or -- NAPOT mode not implemented @@ -158,7 +158,7 @@ begin -- csr.cfg(i)(cfg_rl_c) <= '0'; -- reserved csr.cfg(i)(cfg_rh_c) <= '0'; -- reserved - csr.cfg(i)(cfg_l_c) <= csr_wdata_i((i mod 4)*8+7); -- L (locked) + csr.cfg(i)(cfg_l_c) <= csr_wdata_i((i mod 4)*8+cfg_l_c); -- L (locked) end if; -- address -- From 65378dc8e3eb64995e38394e1a8f89141800eece Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:30:21 +0200 Subject: [PATCH 04/10] [sw/lib] minor relocation --- sw/lib/include/neorv32.h | 42 ++++++++++++++++++++++++++++++++++++ sw/lib/include/neorv32_aux.h | 42 ------------------------------------ 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/sw/lib/include/neorv32.h b/sw/lib/include/neorv32.h index 9210233e2..6b7ca805e 100644 --- a/sw/lib/include/neorv32.h +++ b/sw/lib/include/neorv32.h @@ -189,6 +189,48 @@ extern "C" { /**@}*/ +/**********************************************************************//** + * Processor clock prescaler select (relative to processor's main clock) + **************************************************************************/ +/**@{*/ +enum NEORV32_CLOCK_PRSC_enum { + CLK_PRSC_2 = 0, /**< CPU_CLK / 2 */ + CLK_PRSC_4 = 1, /**< CPU_CLK / 4 */ + CLK_PRSC_8 = 2, /**< CPU_CLK / 8 */ + CLK_PRSC_64 = 3, /**< CPU_CLK / 64 */ + CLK_PRSC_128 = 4, /**< CPU_CLK / 128 */ + CLK_PRSC_1024 = 5, /**< CPU_CLK / 1024 */ + CLK_PRSC_2048 = 6, /**< CPU_CLK / 2048 */ + CLK_PRSC_4096 = 7 /**< CPU_CLK / 4096 */ +}; +/**@}*/ + + +/**********************************************************************//** + * @name Subword-access helper + **************************************************************************/ +/**@{*/ +/** @name 64-bit */ +typedef union { + uint64_t uint64; + uint32_t uint32[sizeof(uint64_t)/sizeof(uint32_t)]; + uint16_t uint16[sizeof(uint64_t)/sizeof(uint16_t)]; + uint8_t uint8[ sizeof(uint64_t)/sizeof(uint8_t)]; +} subwords64_t; +/** @name 32-bit */ +typedef union { + uint32_t uint32[sizeof(uint32_t)/sizeof(uint32_t)]; + uint16_t uint16[sizeof(uint32_t)/sizeof(uint16_t)]; + uint8_t uint8[ sizeof(uint32_t)/sizeof(uint8_t)]; +} subwords32_t; +/** @name 16-bit */ +typedef union { + uint16_t uint16[sizeof(uint16_t)/sizeof(uint16_t)]; + uint8_t uint8[ sizeof(uint16_t)/sizeof(uint8_t)]; +} subwords16_t; +/**@}*/ + + // ---------------------------------------------------------------------------- // Include all system header files // ---------------------------------------------------------------------------- diff --git a/sw/lib/include/neorv32_aux.h b/sw/lib/include/neorv32_aux.h index a6600861f..feaae2f23 100644 --- a/sw/lib/include/neorv32_aux.h +++ b/sw/lib/include/neorv32_aux.h @@ -27,48 +27,6 @@ /**@}*/ -/**********************************************************************//** - * Processor clock prescaler select (relative to processor's main clock) - **************************************************************************/ -/**@{*/ -enum NEORV32_CLOCK_PRSC_enum { - CLK_PRSC_2 = 0, /**< CPU_CLK / 2 */ - CLK_PRSC_4 = 1, /**< CPU_CLK / 4 */ - CLK_PRSC_8 = 2, /**< CPU_CLK / 8 */ - CLK_PRSC_64 = 3, /**< CPU_CLK / 64 */ - CLK_PRSC_128 = 4, /**< CPU_CLK / 128 */ - CLK_PRSC_1024 = 5, /**< CPU_CLK / 1024 */ - CLK_PRSC_2048 = 6, /**< CPU_CLK / 2048 */ - CLK_PRSC_4096 = 7 /**< CPU_CLK / 4096 */ -}; -/**@}*/ - - -/**********************************************************************//** - * @name Subword-access helper - **************************************************************************/ -/**@{*/ -/** @name 64-bit */ -typedef union { - uint64_t uint64; - uint32_t uint32[sizeof(uint64_t)/sizeof(uint32_t)]; - uint16_t uint16[sizeof(uint64_t)/sizeof(uint16_t)]; - uint8_t uint8[ sizeof(uint64_t)/sizeof(uint8_t)]; -} subwords64_t; -/** @name 32-bit */ -typedef union { - uint32_t uint32[sizeof(uint32_t)/sizeof(uint32_t)]; - uint16_t uint16[sizeof(uint32_t)/sizeof(uint16_t)]; - uint8_t uint8[ sizeof(uint32_t)/sizeof(uint8_t)]; -} subwords32_t; -/** @name 16-bit */ -typedef union { - uint16_t uint16[sizeof(uint16_t)/sizeof(uint16_t)]; - uint8_t uint8[ sizeof(uint16_t)/sizeof(uint8_t)]; -} subwords16_t; -/**@}*/ - - /**********************************************************************//** * @name Date and time struct **************************************************************************/ From e057183d3ed8803e2a261a8b5b69982ab82455c2 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:41:13 +0200 Subject: [PATCH 05/10] [rtl] cpu lsu: code cleanup --- rtl/core/neorv32_cpu_lsu.vhd | 49 +++++++++++------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/rtl/core/neorv32_cpu_lsu.vhd b/rtl/core/neorv32_cpu_lsu.vhd index 81dfe4030..1e47a5202 100644 --- a/rtl/core/neorv32_cpu_lsu.vhd +++ b/rtl/core/neorv32_cpu_lsu.vhd @@ -69,8 +69,7 @@ begin end if; end process mem_addr_reg; - -- address output -- - bus_req_o.addr <= mar; + bus_req_o.addr <= mar; -- bus address mar_o <= mar; -- for MTVAL CSR @@ -98,10 +97,7 @@ begin end if; end process mem_type_reg; - -- source identifier -- - bus_req_o.src <= '0'; -- 0 = data access - - -- data fence -- + bus_req_o.src <= '0'; -- 0 = data access bus_req_o.fence <= ctrl_i.lsu_fence; -- this is valid without STB being set @@ -116,23 +112,15 @@ begin if (ctrl_i.lsu_mo_we = '1') then case ctrl_i.ir_funct3(1 downto 0) is when "00" => -- byte - bus_req_o.data(7 downto 0) <= wdata_i(7 downto 0); - bus_req_o.data(15 downto 8) <= wdata_i(7 downto 0); - bus_req_o.data(23 downto 16) <= wdata_i(7 downto 0); - bus_req_o.data(31 downto 24) <= wdata_i(7 downto 0); - bus_req_o.ben <= (others => '0'); + bus_req_o.data <= wdata_i(7 downto 0) & wdata_i(7 downto 0) & wdata_i(7 downto 0) & wdata_i(7 downto 0); + bus_req_o.ben <= (others => '0'); bus_req_o.ben(to_integer(unsigned(addr_i(1 downto 0)))) <= '1'; when "01" => -- half-word - bus_req_o.data(15 downto 0) <= wdata_i(15 downto 0); - bus_req_o.data(31 downto 16) <= wdata_i(15 downto 0); - if (addr_i(1) = '0') then - bus_req_o.ben <= "0011"; -- low half-word - else - bus_req_o.ben <= "1100"; -- high half-word - end if; + bus_req_o.data <= wdata_i(15 downto 0) & wdata_i(15 downto 0); + bus_req_o.ben <= addr_i(1) & addr_i(1) & (not addr_i(1)) & (not addr_i(1)); when others => -- word bus_req_o.data <= wdata_i; - bus_req_o.ben <= "1111"; + bus_req_o.ben <= (others => '1'); end case; end if; end if; @@ -146,36 +134,29 @@ begin if (rstn_i = '0') then rdata_o <= (others => '0'); elsif rising_edge(clk_i) then + rdata_o <= (others => '0'); -- output zero if there is no memory access if (arbiter_req = '1') then -- pending request case ctrl_i.ir_funct3(1 downto 0) is when "00" => -- byte case mar(1 downto 0) is when "00" => -- byte 0 - rdata_o(7 downto 0) <= bus_rsp_i.data(7 downto 0); - rdata_o(XLEN-1 downto 8) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(7))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(7), 24) & bus_rsp_i.data(7 downto 0); when "01" => -- byte 1 - rdata_o(7 downto 0) <= bus_rsp_i.data(15 downto 8); - rdata_o(XLEN-1 downto 8) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(15))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(15), 24) & bus_rsp_i.data(15 downto 8); when "10" => -- byte 2 - rdata_o(7 downto 0) <= bus_rsp_i.data(23 downto 16); - rdata_o(XLEN-1 downto 8) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(23))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(23), 24) & bus_rsp_i.data(23 downto 16); when others => -- byte 3 - rdata_o(7 downto 0) <= bus_rsp_i.data(31 downto 24); - rdata_o(XLEN-1 downto 8) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(31))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(31), 24) & bus_rsp_i.data(31 downto 24); end case; when "01" => -- half-word if (mar(1) = '0') then -- low half-word - rdata_o(15 downto 0) <= bus_rsp_i.data(15 downto 0); - rdata_o(XLEN-1 downto 16) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(15))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(15), 16) & bus_rsp_i.data(15 downto 0); else -- high half-word - rdata_o(15 downto 0) <= bus_rsp_i.data(31 downto 16); - rdata_o(XLEN-1 downto 16) <= (others => ((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(31))); -- sign-extend + rdata_o <= replicate_f((not ctrl_i.ir_funct3(2)) and bus_rsp_i.data(31), 16) & bus_rsp_i.data(31 downto 16); end if; when others => -- word - rdata_o(XLEN-1 downto 0) <= bus_rsp_i.data(XLEN-1 downto 0); + rdata_o <= bus_rsp_i.data; end case; - else - rdata_o <= (others => '0'); -- output zero if there is no memory access end if; end if; end process mem_di_reg; From a3fbfc6b287b8d82e2fcd76346462c3bf30f1ce9 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:41:23 +0200 Subject: [PATCH 06/10] [rtl] MTIME: logic optimization --- rtl/core/neorv32_mtime.vhd | 115 ++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/rtl/core/neorv32_mtime.vhd b/rtl/core/neorv32_mtime.vhd index 726604f1b..5c9b2464b 100644 --- a/rtl/core/neorv32_mtime.vhd +++ b/rtl/core/neorv32_mtime.vhd @@ -35,12 +35,14 @@ architecture neorv32_mtime_rtl of neorv32_mtime is signal mtimecmp_lo : std_ulogic_vector(31 downto 0); signal mtimecmp_hi : std_ulogic_vector(31 downto 0); signal mtime_lo : std_ulogic_vector(31 downto 0); - signal mtime_lo_nxt : std_ulogic_vector(32 downto 0); - signal mtime_lo_cry : std_ulogic_vector(00 downto 0); + signal mtime_lo_q : std_ulogic_vector(31 downto 0); signal mtime_hi : std_ulogic_vector(31 downto 0); + signal mtime_lo_inc : std_ulogic_vector(32 downto 0); + signal carry : std_ulogic_vector( 0 downto 0); + signal mtime_hi_inc : std_ulogic_vector(31 downto 0); - -- comparators -- - signal cmp_lo_ge, cmp_lo_ge_ff, cmp_hi_eq, cmp_hi_gt : std_ulogic; + -- comparator -- + signal cmp_lo_eq, cmp_lo_gt, cmp_lo_ge, cmp_hi_eq, cmp_hi_gt : std_ulogic; begin @@ -49,15 +51,12 @@ begin bus_access: process(rstn_i, clk_i) begin if (rstn_i = '0') then - mtimecmp_lo <= (others => '0'); - mtimecmp_hi <= (others => '0'); - mtime_we <= (others => '0'); - mtime_lo <= (others => '0'); - mtime_lo_cry <= (others => '0'); - mtime_hi <= (others => '0'); - bus_rsp_o <= rsp_terminate_c; + mtimecmp_lo <= (others => '0'); + mtimecmp_hi <= (others => '0'); + mtime_we <= (others => '0'); + bus_rsp_o <= rsp_terminate_c; elsif rising_edge(clk_i) then - -- mtimecmp -- + -- MTIMECMP write access -- if (bus_req_i.stb = '1') and (bus_req_i.rw = '1') and (bus_req_i.addr(3) = '1') then if (bus_req_i.addr(2) = '0') then mtimecmp_lo <= bus_req_i.data; @@ -65,28 +64,9 @@ begin mtimecmp_hi <= bus_req_i.data; end if; end if; - - -- mtime write access buffer -- + -- MTIME write access buffer -- mtime_we(0) <= bus_req_i.stb and bus_req_i.rw and (not bus_req_i.addr(3)) and (not bus_req_i.addr(2)); mtime_we(1) <= bus_req_i.stb and bus_req_i.rw and (not bus_req_i.addr(3)) and ( bus_req_i.addr(2)); - - -- mtime.low -- - if (mtime_we(0) = '1') then -- write access - mtime_lo <= bus_req_i.data; - else -- auto increment - mtime_lo <= mtime_lo_nxt(31 downto 0); - end if; - - -- low-to-high carry -- - mtime_lo_cry(0) <= mtime_lo_nxt(32); - - -- mtime.high -- - if (mtime_we(1) = '1') then -- write access - mtime_hi <= bus_req_i.data; - else -- auto increment (if mtime.low overflows) - mtime_hi <= std_ulogic_vector(unsigned(mtime_hi) + unsigned(mtime_lo_cry)); - end if; - -- read access -- bus_rsp_o.ack <= bus_req_i.stb; -- bus handshake bus_rsp_o.err <= '0'; -- no access errors @@ -102,30 +82,71 @@ begin end if; end process bus_access; - -- mtime.time_LO increment -- - mtime_lo_nxt <= std_ulogic_vector(unsigned('0' & mtime_lo) + 1); - -- system time output -- - time_o <= mtime_hi & mtime_lo; -- NOTE: low and high words are not synchronized here! + -- 64-Bit MTIME Counter ------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + counter: process(rstn_i, clk_i) + begin + if (rstn_i = '0') then + mtime_lo <= (others => '0'); + carry <= (others => '0'); + mtime_hi <= (others => '0'); + elsif rising_edge(clk_i) then + -- low-word -- + if (mtime_we(0) = '1') then -- write access + mtime_lo <= bus_req_i.data; -- write data is stable for at least one cycle after STB becomes low + carry(0) <= '0'; + else -- auto increment + mtime_lo <= mtime_lo_inc(31 downto 0); + carry(0) <= mtime_lo_inc(32); + end if; + -- high-word -- + if (mtime_we(1) = '1') then -- write access + mtime_hi <= bus_req_i.data; -- write data is stable for at least one cycle after STB becomes low + else -- auto increment + mtime_hi <= mtime_hi_inc; + end if; + end if; + end process counter; + + -- time increment -- + mtime_lo_inc <= std_ulogic_vector(unsigned('0' & mtime_lo) + 1); + mtime_hi_inc <= std_ulogic_vector(unsigned(mtime_hi) + unsigned(carry)); + + + -- Synchronize Output Words --------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + out_sync: process(rstn_i, clk_i) + begin + if (rstn_i = '0') then + mtime_lo_q <= (others => '0'); + elsif rising_edge(clk_i) then + mtime_lo_q <= mtime_lo; + end if; + end process out_sync; + + -- delay low-word by one cycle -- + time_o <= mtime_hi & mtime_lo_q; - -- Comparator ----------------------------------------------------------------------------- + -- Comparator (Interrupt Generator) ------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - cmp_sync: process(rstn_i, clk_i) + irq_gen: process(rstn_i, clk_i) begin if (rstn_i = '0') then - cmp_lo_ge_ff <= '0'; - irq_o <= '0'; + cmp_lo_ge <= '0'; + irq_o <= '0'; elsif rising_edge(clk_i) then - cmp_lo_ge_ff <= cmp_lo_ge; -- there is one cycle delay between low (earlier) and high (later) word - irq_o <= cmp_hi_gt or (cmp_hi_eq and cmp_lo_ge_ff); + cmp_lo_ge <= cmp_lo_gt or cmp_lo_eq; -- low word greater than or equal + irq_o <= cmp_hi_gt or (cmp_hi_eq and cmp_lo_ge); end if; - end process cmp_sync; + end process irq_gen; - -- sub-word comparators -- - cmp_lo_ge <= '1' when (unsigned(mtime_lo) >= unsigned(mtimecmp_lo)) else '0'; -- low-word: greater than or equal - cmp_hi_eq <= '1' when (unsigned(mtime_hi) = unsigned(mtimecmp_hi)) else '0'; -- high-word: equal - cmp_hi_gt <= '1' when (unsigned(mtime_hi) > unsigned(mtimecmp_hi)) else '0'; -- high-word: greater than + -- sub-word comparators; there is one cycle delay between low (earlier) and high (later) word -- + cmp_lo_eq <= '1' when (unsigned(mtime_lo) = unsigned(mtimecmp_lo)) else '0'; -- low-word equal + cmp_lo_gt <= '1' when (unsigned(mtime_lo) > unsigned(mtimecmp_lo)) else '0'; -- low-word greater than + cmp_hi_eq <= '1' when (unsigned(mtime_hi) = unsigned(mtimecmp_hi)) else '0'; -- high-word equal + cmp_hi_gt <= '1' when (unsigned(mtime_hi) > unsigned(mtimecmp_hi)) else '0'; -- high-word greater than end neorv32_mtime_rtl; From 910dfea32ff2e7ffeab8d6b94d3c4bc31eea3d79 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:41:40 +0200 Subject: [PATCH 07/10] [rtl] top: remove mtime delay --- rtl/core/neorv32_top.vhd | 137 +++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 77 deletions(-) diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index 64d47a177..fdc27edff 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -132,103 +132,103 @@ entity neorv32_top is ); port ( -- Global control -- - clk_i : in std_ulogic; -- global clock, rising edge - rstn_i : in std_ulogic; -- global reset, low-active, async + clk_i : in std_ulogic; -- global clock, rising edge + rstn_i : in std_ulogic; -- global reset, low-active, async -- JTAG on-chip debugger interface (available if ON_CHIP_DEBUGGER_EN = true) -- - jtag_tck_i : in std_ulogic := 'L'; -- serial clock - jtag_tdi_i : in std_ulogic := 'L'; -- serial data input - jtag_tdo_o : out std_ulogic; -- serial data output - jtag_tms_i : in std_ulogic := 'L'; -- mode select + jtag_tck_i : in std_ulogic := 'L'; -- serial clock + jtag_tdi_i : in std_ulogic := 'L'; -- serial data input + jtag_tdo_o : out std_ulogic; -- serial data output + jtag_tms_i : in std_ulogic := 'L'; -- mode select -- External bus interface (available if XBUS_EN = true) -- - xbus_adr_o : out std_ulogic_vector(31 downto 0); -- address - xbus_dat_o : out std_ulogic_vector(31 downto 0); -- write data - xbus_tag_o : out std_ulogic_vector(2 downto 0); -- access tag - xbus_we_o : out std_ulogic; -- read/write - xbus_sel_o : out std_ulogic_vector(3 downto 0); -- byte enable - xbus_stb_o : out std_ulogic; -- strobe - xbus_cyc_o : out std_ulogic; -- valid cycle + xbus_adr_o : out std_ulogic_vector(31 downto 0); -- address + xbus_dat_o : out std_ulogic_vector(31 downto 0); -- write data + xbus_tag_o : out std_ulogic_vector(2 downto 0); -- access tag + xbus_we_o : out std_ulogic; -- read/write + xbus_sel_o : out std_ulogic_vector(3 downto 0); -- byte enable + xbus_stb_o : out std_ulogic; -- strobe + xbus_cyc_o : out std_ulogic; -- valid cycle xbus_dat_i : in std_ulogic_vector(31 downto 0) := (others => 'L'); -- read data - xbus_ack_i : in std_ulogic := 'L'; -- transfer acknowledge - xbus_err_i : in std_ulogic := 'L'; -- transfer error + xbus_ack_i : in std_ulogic := 'L'; -- transfer acknowledge + xbus_err_i : in std_ulogic := 'L'; -- transfer error -- Stream Link Interface (available if IO_SLINK_EN = true) -- slink_rx_dat_i : in std_ulogic_vector(31 downto 0) := (others => 'L'); -- RX input data - slink_rx_src_i : in std_ulogic_vector(3 downto 0) := (others => 'L'); -- RX source routing information - slink_rx_val_i : in std_ulogic := 'L'; -- RX valid input - slink_rx_lst_i : in std_ulogic := 'L'; --RX last element of stream - slink_rx_rdy_o : out std_ulogic; -- RX ready to receive - slink_tx_dat_o : out std_ulogic_vector(31 downto 0); -- TX output data - slink_tx_dst_o : out std_ulogic_vector(3 downto 0); -- TX destination routing information - slink_tx_val_o : out std_ulogic; -- TX valid output - slink_tx_lst_o : out std_ulogic; -- TX last element of stream - slink_tx_rdy_i : in std_ulogic := 'L'; -- TX ready to send + slink_rx_src_i : in std_ulogic_vector(3 downto 0) := (others => 'L'); -- RX source routing information + slink_rx_val_i : in std_ulogic := 'L'; -- RX valid input + slink_rx_lst_i : in std_ulogic := 'L'; -- RX last element of stream + slink_rx_rdy_o : out std_ulogic; -- RX ready to receive + slink_tx_dat_o : out std_ulogic_vector(31 downto 0); -- TX output data + slink_tx_dst_o : out std_ulogic_vector(3 downto 0); -- TX destination routing information + slink_tx_val_o : out std_ulogic; -- TX valid output + slink_tx_lst_o : out std_ulogic; -- TX last element of stream + slink_tx_rdy_i : in std_ulogic := 'L'; -- TX ready to send -- XIP (execute in place via SPI) signals (available if XIP_EN = true) -- - xip_csn_o : out std_ulogic; -- chip-select, low-active - xip_clk_o : out std_ulogic; -- serial clock - xip_dat_i : in std_ulogic := 'L'; -- device data input - xip_dat_o : out std_ulogic; -- controller data output + xip_csn_o : out std_ulogic; -- chip-select, low-active + xip_clk_o : out std_ulogic; -- serial clock + xip_dat_i : in std_ulogic := 'L'; -- device data input + xip_dat_o : out std_ulogic; -- controller data output -- GPIO (available if IO_GPIO_NUM > 0) -- - gpio_o : out std_ulogic_vector(63 downto 0); -- parallel output + gpio_o : out std_ulogic_vector(63 downto 0); -- parallel output gpio_i : in std_ulogic_vector(63 downto 0) := (others => 'L'); -- parallel input -- primary UART0 (available if IO_UART0_EN = true) -- - uart0_txd_o : out std_ulogic; -- UART0 send data - uart0_rxd_i : in std_ulogic := 'L'; -- UART0 receive data - uart0_rts_o : out std_ulogic; -- HW flow control: UART0.RX ready to receive ("RTR"), low-active, optional - uart0_cts_i : in std_ulogic := 'L'; -- HW flow control: UART0.TX allowed to transmit, low-active, optional + uart0_txd_o : out std_ulogic; -- UART0 send data + uart0_rxd_i : in std_ulogic := 'L'; -- UART0 receive data + uart0_rts_o : out std_ulogic; -- HW flow control: UART0.RX ready to receive ("RTR"), low-active, optional + uart0_cts_i : in std_ulogic := 'L'; -- HW flow control: UART0.TX allowed to transmit, low-active, optional -- secondary UART1 (available if IO_UART1_EN = true) -- - uart1_txd_o : out std_ulogic; -- UART1 send data - uart1_rxd_i : in std_ulogic := 'L'; -- UART1 receive data - uart1_rts_o : out std_ulogic; -- HW flow control: UART1.RX ready to receive ("RTR"), low-active, optional - uart1_cts_i : in std_ulogic := 'L'; -- HW flow control: UART1.TX allowed to transmit, low-active, optional + uart1_txd_o : out std_ulogic; -- UART1 send data + uart1_rxd_i : in std_ulogic := 'L'; -- UART1 receive data + uart1_rts_o : out std_ulogic; -- HW flow control: UART1.RX ready to receive ("RTR"), low-active, optional + uart1_cts_i : in std_ulogic := 'L'; -- HW flow control: UART1.TX allowed to transmit, low-active, optional -- SPI (available if IO_SPI_EN = true) -- - spi_clk_o : out std_ulogic; -- SPI serial clock - spi_dat_o : out std_ulogic; -- controller data out, peripheral data in - spi_dat_i : in std_ulogic := 'L'; -- controller data in, peripheral data out - spi_csn_o : out std_ulogic_vector(7 downto 0); -- chip-select + spi_clk_o : out std_ulogic; -- SPI serial clock + spi_dat_o : out std_ulogic; -- controller data out, peripheral data in + spi_dat_i : in std_ulogic := 'L'; -- controller data in, peripheral data out + spi_csn_o : out std_ulogic_vector(7 downto 0); -- chip-select, low-active -- SDI (available if IO_SDI_EN = true) -- - sdi_clk_i : in std_ulogic := 'L'; -- SDI serial clock - sdi_dat_o : out std_ulogic; -- controller data out, peripheral data in - sdi_dat_i : in std_ulogic := 'L'; -- controller data in, peripheral data out - sdi_csn_i : in std_ulogic := 'H'; -- chip-select + sdi_clk_i : in std_ulogic := 'L'; -- SDI serial clock + sdi_dat_o : out std_ulogic; -- controller data out, peripheral data in + sdi_dat_i : in std_ulogic := 'L'; -- controller data in, peripheral data out + sdi_csn_i : in std_ulogic := 'H'; -- chip-select, low-active -- TWI (available if IO_TWI_EN = true) -- - twi_sda_i : in std_ulogic := 'H'; -- serial data line sense input - twi_sda_o : out std_ulogic; -- serial data line output (pull low only) - twi_scl_i : in std_ulogic := 'H'; -- serial clock line sense input - twi_scl_o : out std_ulogic; -- serial clock line output (pull low only) + twi_sda_i : in std_ulogic := 'H'; -- serial data line sense input + twi_sda_o : out std_ulogic; -- serial data line output (pull low only) + twi_scl_i : in std_ulogic := 'H'; -- serial clock line sense input + twi_scl_o : out std_ulogic; -- serial clock line output (pull low only) -- 1-Wire Interface (available if IO_ONEWIRE_EN = true) -- - onewire_i : in std_ulogic := 'H'; -- 1-wire bus sense input - onewire_o : out std_ulogic; -- 1-wire bus output (pull low only) + onewire_i : in std_ulogic := 'H'; -- 1-wire bus sense input + onewire_o : out std_ulogic; -- 1-wire bus output (pull low only) -- PWM (available if IO_PWM_NUM_CH > 0) -- - pwm_o : out std_ulogic_vector(11 downto 0); -- pwm channels + pwm_o : out std_ulogic_vector(11 downto 0); -- pwm channels -- Custom Functions Subsystem IO (available if IO_CFS_EN = true) -- cfs_in_i : in std_ulogic_vector(IO_CFS_IN_SIZE-1 downto 0) := (others => 'L'); -- custom CFS inputs conduit - cfs_out_o : out std_ulogic_vector(IO_CFS_OUT_SIZE-1 downto 0); -- custom CFS outputs conduit + cfs_out_o : out std_ulogic_vector(IO_CFS_OUT_SIZE-1 downto 0); -- custom CFS outputs conduit -- NeoPixel-compatible smart LED interface (available if IO_NEOLED_EN = true) -- - neoled_o : out std_ulogic; -- async serial data line + neoled_o : out std_ulogic; -- async serial data line -- Machine timer system time (available if IO_MTIME_EN = true) -- - mtime_time_o : out std_ulogic_vector(63 downto 0); -- current system time + mtime_time_o : out std_ulogic_vector(63 downto 0); -- current system time -- External platform interrupts (available if XIRQ_NUM_CH > 0) -- xirq_i : in std_ulogic_vector(31 downto 0) := (others => 'L'); -- IRQ channels -- CPU interrupts -- - mtime_irq_i : in std_ulogic := 'L'; -- machine timer interrupt, available if IO_MTIME_EN = false - msw_irq_i : in std_ulogic := 'L'; -- machine software interrupt - mext_irq_i : in std_ulogic := 'L' -- machine external interrupt + mtime_irq_i : in std_ulogic := 'L'; -- machine timer interrupt, available if IO_MTIME_EN = false + msw_irq_i : in std_ulogic := 'L'; -- machine software interrupt + mext_irq_i : in std_ulogic := 'L' -- machine external interrupt ); end neorv32_top; @@ -319,9 +319,6 @@ architecture neorv32_top_rtl of neorv32_top is signal cpu_firq : std_ulogic_vector(15 downto 0); signal mtime_irq : std_ulogic; - -- misc -- - signal mtime_time : std_ulogic_vector(63 downto 0); - begin -- ************************************************************************************************************************** @@ -1197,30 +1194,16 @@ begin rstn_i => rstn_sys, bus_req_i => iodev_req(IODEV_MTIME), bus_rsp_o => iodev_rsp(IODEV_MTIME), - time_o => mtime_time, + time_o => mtime_time_o, irq_o => mtime_irq ); - - -- synchronize system time output LO -- - mtime_sync: process(rstn_sys, clk_i) - begin - if (rstn_sys = '0') then - mtime_time_o(31 downto 0) <= (others => '0'); - elsif rising_edge(clk_i) then - mtime_time_o(31 downto 0) <= mtime_time(31 downto 0); - end if; - end process mtime_sync; - - -- system time output HI -- - mtime_time_o(63 downto 32) <= mtime_time(63 downto 32); - end generate; neorv32_mtime_inst_false: if not IO_MTIME_EN generate iodev_rsp(IODEV_MTIME) <= rsp_terminate_c; - mtime_irq <= mtime_irq_i; mtime_time_o <= (others => '0'); + mtime_irq <= mtime_irq_i; end generate; From 99dc8cab5ac90e61dc3c0434b8273f9f02beae28 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:53:40 +0200 Subject: [PATCH 08/10] [rtl] DMA: minor code cleanup --- rtl/core/neorv32_dma.vhd | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/rtl/core/neorv32_dma.vhd b/rtl/core/neorv32_dma.vhd index b1995169c..2300de073 100644 --- a/rtl/core/neorv32_dma.vhd +++ b/rtl/core/neorv32_dma.vhd @@ -339,18 +339,10 @@ begin align_buf <= align_end; else -- byte case engine.src_addr(1 downto 0) is - when "00" => -- byte 0 - align_buf(7 downto 0) <= align_end(7 downto 0); - align_buf(31 downto 8) <= (others => (config.qsel(1) and align_end(7))); -- sign extension - when "01" => -- byte 1 - align_buf(7 downto 0) <= align_end(15 downto 8); - align_buf(31 downto 8) <= (others => (config.qsel(1) and align_end(15))); -- sign extension - when "10" => -- byte 2 - align_buf(7 downto 0) <= align_end(23 downto 16); - align_buf(31 downto 8) <= (others => (config.qsel(1) and align_end(23))); -- sign extension - when others => -- byte 3 - align_buf(7 downto 0) <= align_end(31 downto 24); - align_buf(31 downto 8) <= (others => (config.qsel(1) and align_end(31))); -- sign extension + when "00" => align_buf <= replicate_f(config.qsel(1) and align_end(7), 24) & align_end(7 downto 0); + when "01" => align_buf <= replicate_f(config.qsel(1) and align_end(15), 24) & align_end(15 downto 8); + when "10" => align_buf <= replicate_f(config.qsel(1) and align_end(23), 24) & align_end(23 downto 16); + when others => align_buf <= replicate_f(config.qsel(1) and align_end(31), 24) & align_end(31 downto 24); end case; end if; end if; @@ -362,14 +354,11 @@ begin begin dma_req_o.ben <= (others => '0'); -- default if (config.qsel = qsel_b2b_c) then -- byte - dma_req_o.data(7 downto 0) <= align_buf(7 downto 0); - dma_req_o.data(15 downto 8) <= align_buf(7 downto 0); - dma_req_o.data(23 downto 16) <= align_buf(7 downto 0); - dma_req_o.data(31 downto 24) <= align_buf(7 downto 0); + dma_req_o.data <= align_buf(7 downto 0) & align_buf(7 downto 0) & align_buf(7 downto 0) & align_buf(7 downto 0); dma_req_o.ben(to_integer(unsigned(engine.dst_addr(1 downto 0)))) <= '1'; else -- word dma_req_o.data <= align_buf; - dma_req_o.ben <= "1111"; + dma_req_o.ben <= (others => '1'); end if; end process dst_align; From 49a3593519bf38040c3d1d51bd3434388c529a6c Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 19:54:33 +0200 Subject: [PATCH 09/10] [rtl] cleanup & optimize cpu control logic --- rtl/core/neorv32_cpu_control.vhd | 145 +++++++++++-------------------- 1 file changed, 49 insertions(+), 96 deletions(-) diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index db1fee9f6..6a95bcf52 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -276,7 +276,7 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is inc : cnt_inc_t; lo : cnt_dat_t; -- counter word low hi : cnt_dat_t; -- counter word high - nxt : cnt_nxt_t; -- increment, including carry bit + nxt : cnt_nxt_t; -- low-word increment including carry bit ovf : cnt_ovf_t; -- counter low-to-high-word overflow end record; signal cnt : cnt_t; @@ -426,31 +426,11 @@ begin -- Instruction Issue (decompress 16-bit instructions and assemble a 32-bit instruction word) -- **************************************************************************************************************************** - -- Compressed Instructions Decoder -------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - neorv32_cpu_decompressor_inst_true: - if CPU_EXTENSION_RISCV_C generate - neorv32_cpu_decompressor_inst: entity neorv32.neorv32_cpu_decompressor - port map ( - instr16_i => issue_engine.ci_i16, - instr32_o => issue_engine.ci_i32 - ); - end generate; - - neorv32_cpu_decompressor_inst_false: - if not CPU_EXTENSION_RISCV_C generate - issue_engine.ci_i32 <= (others => '0'); - end generate; - - -- half-word select -- - issue_engine.ci_i16 <= ipb.rdata(0)(15 downto 0) when (issue_engine.align = '0') else ipb.rdata(1)(15 downto 0); - - - -- Issue Engine FSM (required if C extension is enabled) ---------------------------------- - -- ------------------------------------------------------------------------------------------- issue_engine_enabled: if CPU_EXTENSION_RISCV_C generate + -- Issue Engine FSM ----------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- issue_engine_fsm_sync: process(rstn_i, clk_i) begin if (rstn_i = '0') then @@ -459,7 +439,7 @@ begin if (fetch_engine.restart = '1') then issue_engine.align <= execute_engine.next_pc(1); -- branch to unaligned address? elsif (issue_engine.ack = '1') then - issue_engine.align <= (issue_engine.align and (not issue_engine.align_clr)) or issue_engine.align_set; -- "RS" flip-flop + issue_engine.align <= (issue_engine.align and (not issue_engine.align_clr)) or issue_engine.align_set; -- "rs flip-flop" end if; end if; end process issue_engine_fsm_sync; @@ -493,13 +473,27 @@ begin end if; end process issue_engine_fsm_comb; + + -- Compressed Instructions Decoder -------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + neorv32_cpu_decompressor_inst: entity neorv32.neorv32_cpu_decompressor + port map ( + instr16_i => issue_engine.ci_i16, + instr32_o => issue_engine.ci_i32 + ); + + -- half-word select -- + issue_engine.ci_i16 <= ipb.rdata(0)(15 downto 0) when (issue_engine.align = '0') else ipb.rdata(1)(15 downto 0); + end generate; -- /issue_engine_enabled - issue_engine_disabled: -- use IPB(0) status flags only + + -- issue engine disabled -- + issue_engine_disabled: if not CPU_EXTENSION_RISCV_C generate - issue_engine.valid <= (others => ipb.avail(0)); + issue_engine.valid <= (others => ipb.avail(0)); -- use IPB(0) status flags only issue_engine.data <= '0' & ipb.rdata(0)(16) & (ipb.rdata(1)(15 downto 0) & ipb.rdata(0)(15 downto 0)); - end generate; -- /issue_engine_disabled + end generate; -- update IPB FIFOs -- ipb.re(0) <= issue_engine.valid(0) and issue_engine.ack; @@ -517,39 +511,20 @@ begin if (rstn_i = '0') then imm_o <= (others => '0'); elsif rising_edge(clk_i) then - -- default I-immediate: ALU-immediate, load, jump-and-link with register -- - imm_o(XLEN-1 downto 11) <= (others => execute_engine.ir(31)); -- sign extension - imm_o(10 downto 1) <= execute_engine.ir(30 downto 21); - imm_o(0) <= execute_engine.ir(20); - -- + imm_o <= (others => '0'); case decode_aux.opcode is - when opcode_store_c => -- S-immediate: store - imm_o(XLEN-1 downto 11) <= (others => execute_engine.ir(31)); -- sign extension - imm_o(10 downto 5) <= execute_engine.ir(30 downto 25); - imm_o(4 downto 0) <= execute_engine.ir(11 downto 7); - when opcode_branch_c => -- B-immediate: conditional branch - imm_o(XLEN-1 downto 12) <= (others => execute_engine.ir(31)); -- sign extension - imm_o(11) <= execute_engine.ir(7); - imm_o(10 downto 5) <= execute_engine.ir(30 downto 25); - imm_o(4 downto 1) <= execute_engine.ir(11 downto 8); - imm_o(0) <= '0'; - when opcode_lui_c | opcode_auipc_c => -- U-immediate: lui, auipc - imm_o(XLEN-1 downto 12) <= execute_engine.ir(31 downto 12); - imm_o(11 downto 0) <= (others => '0'); - when opcode_jal_c => -- J-immediate: unconditional jump - imm_o(XLEN-1 downto 20) <= (others => execute_engine.ir(31)); -- sign extension - imm_o(19 downto 12) <= execute_engine.ir(19 downto 12); - imm_o(11) <= execute_engine.ir(20); - imm_o(10 downto 1) <= execute_engine.ir(30 downto 21); - imm_o(0) <= '0'; + when opcode_store_c => -- S-immediate + imm_o <= replicate_f(execute_engine.ir(31), 21) & execute_engine.ir(30 downto 25) & execute_engine.ir(11 downto 7); + when opcode_branch_c => -- B-immediate + imm_o <= replicate_f(execute_engine.ir(31), 20) & execute_engine.ir(7) & execute_engine.ir(30 downto 25) & execute_engine.ir(11 downto 8) & '0'; + when opcode_lui_c | opcode_auipc_c => -- U-immediate + imm_o <= execute_engine.ir(31 downto 12) & x"000"; + when opcode_jal_c => -- J-immediate + imm_o <= replicate_f(execute_engine.ir(31), 12) & execute_engine.ir(19 downto 12) & execute_engine.ir(20) & execute_engine.ir(30 downto 21) & '0'; when opcode_amo_c => -- atomic memory access - if CPU_EXTENSION_RISCV_A then - imm_o <= (others => '0'); - else - NULL; - end if; - when others => - NULL; + imm_o <= (others => '0'); + when others => -- I-immediate + imm_o <= replicate_f(execute_engine.ir(31), 21) & execute_engine.ir(30 downto 21) & execute_engine.ir(20); end case; end if; end process imm_gen; @@ -1028,15 +1003,11 @@ begin -- ------------------------------------------------------------------------------------------- -- register file -- - ctrl_o.rf_wb_en <= ctrl.rf_wb_en and -- inhibit write-back only for rd-updating exceptions that must not commit - (not trap_ctrl.exc_buf(exc_illegal_c)) and - (not trap_ctrl.exc_buf(exc_ialign_c)) and (not trap_ctrl.exc_buf(exc_salign_c)) and (not trap_ctrl.exc_buf(exc_lalign_c)) and - (not trap_ctrl.exc_buf(exc_iaccess_c)) and (not trap_ctrl.exc_buf(exc_saccess_c)) and (not trap_ctrl.exc_buf(exc_laccess_c)); + ctrl_o.rf_wb_en <= ctrl.rf_wb_en and (not or_reduce_f(trap_ctrl.exc_buf));-- inhibit write-back if exception ctrl_o.rf_rs1 <= execute_engine.ir(instr_rs1_msb_c downto instr_rs1_lsb_c); ctrl_o.rf_rs2 <= execute_engine.ir(instr_rs2_msb_c downto instr_rs2_lsb_c); ctrl_o.rf_rd <= execute_engine.ir(instr_rd_msb_c downto instr_rd_lsb_c); ctrl_o.rf_zero_we <= ctrl.rf_zero_we; - -- alu -- ctrl_o.alu_op <= ctrl.alu_op; ctrl_o.alu_sub <= ctrl.alu_sub; @@ -1044,19 +1015,16 @@ begin ctrl_o.alu_opb_mux <= ctrl.alu_opb_mux; ctrl_o.alu_unsigned <= ctrl.alu_unsigned; ctrl_o.alu_cp_trig <= ctrl.alu_cp_trig; - -- load/store unit -- ctrl_o.lsu_req <= ctrl.lsu_req; ctrl_o.lsu_rw <= ctrl.lsu_rw; ctrl_o.lsu_mo_we <= '1' when (execute_engine.state = MEM_REQ) else '0'; -- write memory output registers (data & address) ctrl_o.lsu_fence <= ctrl.lsu_fence; -- fence(.i) ctrl_o.lsu_priv <= csr.mstatus_mpp when (csr.mstatus_mprv = '1') else csr.privilege_eff; -- effective privilege level for loads/stores in M-mode - -- instruction word bit fields -- ctrl_o.ir_funct3 <= execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c); ctrl_o.ir_funct12 <= execute_engine.ir(instr_funct12_msb_c downto instr_funct12_lsb_c); ctrl_o.ir_opcode <= execute_engine.ir(instr_opcode_msb_c downto instr_opcode_lsb_c); - -- cpu status -- ctrl_o.cpu_priv <= csr.privilege_eff; ctrl_o.cpu_sleep <= sleep_mode; @@ -1101,10 +1069,9 @@ begin csr_valid(2) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_Zfinx); -- available if FPU implemented -- machine trap setup/handling, environment/information registers, etc. -- - when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | - csr_mscratch_c | csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | - csr_mtinst_c | csr_mcountinhibit_c | csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | - csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c => + when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | csr_mscratch_c | + csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | csr_mtinst_c | csr_mcountinhibit_c | + csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c => csr_valid(2) <= '1'; -- always implemented -- machine-controlled user-mode CSRs -- @@ -1138,8 +1105,7 @@ begin csr_valid(2) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_Zihpm); -- available if Zihpm implemented -- counter and timer CSRs -- - when csr_cycle_c | csr_mcycle_c | csr_instret_c | csr_minstret_c | - csr_cycleh_c | csr_mcycleh_c | csr_instreth_c | csr_minstreth_c => + when csr_cycle_c | csr_mcycle_c | csr_instret_c | csr_minstret_c | csr_cycleh_c | csr_mcycleh_c | csr_instreth_c | csr_minstreth_c => csr_valid(2) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_Zicntr); -- available if Zicntr implemented -- debug-mode CSRs -- @@ -1271,7 +1237,7 @@ begin when opcode_fence_c => case execute_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is - when funct3_fence_c | funct3_fencei_c => illegal_cmd <= '0'; -- fence[.i] + when funct3_fence_c | funct3_fencei_c => illegal_cmd <= '0'; when others => illegal_cmd <= '1'; end case; @@ -1359,13 +1325,8 @@ begin end if; -- debug-mode entry -- - if CPU_EXTENSION_RISCV_Sdext then - trap_ctrl.exc_buf(exc_db_break_c) <= (trap_ctrl.exc_buf(exc_db_break_c) or debug_ctrl.trig_break) and (not trap_ctrl.env_enter); - trap_ctrl.exc_buf(exc_db_hw_c) <= (trap_ctrl.exc_buf(exc_db_hw_c) or debug_ctrl.trig_hw) and (not trap_ctrl.env_enter); - else - trap_ctrl.exc_buf(exc_db_break_c) <= '0'; - trap_ctrl.exc_buf(exc_db_hw_c) <= '0'; - end if; + trap_ctrl.exc_buf(exc_db_break_c) <= (trap_ctrl.exc_buf(exc_db_break_c) or debug_ctrl.trig_break) and (not trap_ctrl.env_enter); + trap_ctrl.exc_buf(exc_db_hw_c) <= (trap_ctrl.exc_buf(exc_db_hw_c) or debug_ctrl.trig_hw) and (not trap_ctrl.env_enter); end if; end process exception_buffer; @@ -1396,7 +1357,7 @@ begin trap_ctrl.irq_pnd(irq_db_halt_c) <= '0'; -- unused trap_ctrl.irq_pnd(irq_db_step_c) <= '0'; -- unused - -- Interrupt-Masking Buffer --------------------------------------------- + -- Interrupt Buffer ----------------------------------------------------- -- Masking of interrupt request lines. Additionally, this buffer ensures -- that an active interrupt request line stays active (even when -- disabled via MIE) if the trap environment is already starting. @@ -1413,13 +1374,8 @@ begin end loop; -- debug-mode entry -- - if CPU_EXTENSION_RISCV_Sdext then - trap_ctrl.irq_buf(irq_db_halt_c) <= debug_ctrl.trig_halt or (trap_ctrl.env_pending and trap_ctrl.irq_buf(irq_db_halt_c)); - trap_ctrl.irq_buf(irq_db_step_c) <= debug_ctrl.trig_step or (trap_ctrl.env_pending and trap_ctrl.irq_buf(irq_db_step_c)); - else - trap_ctrl.irq_buf(irq_db_halt_c) <= '0'; - trap_ctrl.irq_buf(irq_db_step_c) <= '0'; - end if; + trap_ctrl.irq_buf(irq_db_halt_c) <= debug_ctrl.trig_halt or (trap_ctrl.env_pending and trap_ctrl.irq_buf(irq_db_halt_c)); + trap_ctrl.irq_buf(irq_db_step_c) <= debug_ctrl.trig_step or (trap_ctrl.env_pending and trap_ctrl.irq_buf(irq_db_step_c)); end if; end process interrupt_buffer; @@ -1785,12 +1741,9 @@ begin -- DEBUG MODE entry - no CSR update when already in debug-mode! -- if CPU_EXTENSION_RISCV_Sdext and (trap_ctrl.cause(5) = '1') and (debug_ctrl.running = '0') then - -- trap cause -- - csr.dcsr_cause <= trap_ctrl.cause(2 downto 0); -- why did we enter debug mode? - -- current privilege mode when debug mode was entered -- - csr.dcsr_prv <= csr.privilege; - -- trap PC -- - csr.dpc <= trap_ctrl.epc(XLEN-1 downto 1) & '0'; + csr.dcsr_cause <= trap_ctrl.cause(2 downto 0); -- trap cause + csr.dcsr_prv <= csr.privilege; -- current privilege mode when debug mode was entered + csr.dpc <= trap_ctrl.epc(XLEN-1 downto 1) & '0'; -- trap PC end if; -- ******************************************************************************** @@ -2053,7 +2006,7 @@ begin when csr_tinfo_c => -- trigger information if CPU_EXTENSION_RISCV_Sdtrig then csr_rdata(31 downto 24) <= x"01"; -- Sdtrig ISA spec. version 1.0 - csr_rdata(15 downto 00) <= x"0006"; -- mcontrol6 type trigger only + csr_rdata(15 downto 0) <= x"0006"; -- mcontrol6 type trigger only end if; -- -------------------------------------------------------------------- @@ -2198,7 +2151,7 @@ begin hpmevent_write: process(csr) begin hpmevent_we <= (others => '0'); - -- [NOTE] no need to check bit 4 of the address as it's always zero (checked by illegal CSR logic) + -- [NOTE] no need to check bit 4 of the address as it is always zero (checked by illegal CSR logic) if (csr.addr(11 downto 5) = csr_mcountinhibit_c(11 downto 5)) and (csr.we = '1') then hpmevent_we(to_integer(unsigned(csr.addr(3 downto 0)))) <= '1'; end if; From 9293c7f2f738c23b744b66415bca3b1de79cf758 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Fri, 5 Jul 2024 19:55:17 +0200 Subject: [PATCH 10/10] [changelog] add v1.10.1.1 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bb79c50b..0c60581ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| +| 05.07.2024 | 1.10.1.1 | minor rtl cleanups and optimizations | [#941](https://github.com/stnolting/neorv32/pull/941) | | 04.07.2024 | [**:rocket:1.10.1**](https://github.com/stnolting/neorv32/releases/tag/v1.10.1) | **New release** | | | 04.07.2024 | 1.10.0.10 | :warning: rework GPTMRM and remove capture mode | [#939](https://github.com/stnolting/neorv32/pull/939) | | 03.07.2024 | 1.10.0.9 | :warning: remove `AMO_RVS_GRANULARITY` generic, reservation set granularity is now fixed to 4 bytes | [#938](https://github.com/stnolting/neorv32/pull/938) |