Skip to content

Commit

Permalink
rtl logic optimization and cleanups (#880)
Browse files Browse the repository at this point in the history
  • Loading branch information
stnolting committed Apr 21, 2024
2 parents 183fa56 + 47c94fa commit 19363d3
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 215 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12

| Date | Version | Comment | Link |
|:----:|:-------:|:--------|:----:|
| 21.04.2024 | 1.9.8.5 | rtl cleanups and (area) optimizations | [#880](https://github.com/stnolting/neorv32/pull/880) |
| 16.04.2024 | 1.9.8.4 | :warning: use a 4-bit FIRQ select instead of a 16-bit FIRQ mask for DMA auto-trigger configuration | [#877](https://github.com/stnolting/neorv32/pull/877) |
| 15.04.2024 | 1.9.8.3 | :warning: simplify XBUS gateway logic and configuration generics; only "pipelined Wishbone" protocol is supported now | [#876](https://github.com/stnolting/neorv32/pull/876) |
| 14.04.2024 | 1.9.8.2 | :warning: rename SLINK data interface registers; minor CPU control logic/area optimizations | [#874](https://github.com/stnolting/neorv32/pull/874) |
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_cfs.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ begin
cfs_reg_rd(0) <= bin_to_gray_f(cfs_reg_wr(0)); -- convert binary to gray code
cfs_reg_rd(1) <= gray_to_bin_f(cfs_reg_wr(1)); -- convert gray to binary code
cfs_reg_rd(2) <= bit_rev_f(cfs_reg_wr(2)); -- bit reversal
cfs_reg_rd(3) <= bswap32_f(cfs_reg_wr(3)); -- byte swap (endianness conversion)
cfs_reg_rd(3) <= bswap_f(cfs_reg_wr(3)); -- byte swap (endianness conversion)


end neorv32_cfs_rtl;
314 changes: 137 additions & 177 deletions rtl/core/neorv32_cpu_cp_bitmanip.vhd

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions rtl/core/neorv32_cpu_cp_shifter.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ architecture neorv32_cpu_cp_shifter_rtl of neorv32_cpu_cp_shifter is
-- barrel shifter --
type bs_level_t is array (index_size_f(XLEN) downto 0) of std_ulogic_vector(XLEN-1 downto 0);
signal bs_level : bs_level_t;
signal bs_mask : std_ulogic;
signal bs_sign : std_ulogic;
signal bs_start : std_ulogic;
signal bs_result : std_ulogic_vector(XLEN-1 downto 0);

Expand Down Expand Up @@ -109,14 +109,14 @@ begin
if FAST_SHIFT_EN generate

-- input layer: convert left shifts to right shifts by bit-reversal --
bs_level(index_size_f(XLEN)) <= bit_rev_f(rs1_i) when (ctrl_i.ir_funct3(2) = '0') else rs1_i;
bs_mask <= rs1_i(XLEN-1) and ctrl_i.ir_funct12(10); -- MSBs mask for arithmetic/logic shifts
bs_level(0) <= bit_rev_f(rs1_i) when (ctrl_i.ir_funct3(2) = '0') else rs1_i;
bs_sign <= rs1_i(XLEN-1) and ctrl_i.ir_funct12(10); -- sign extension for arithmetic shifts

-- shifter layers: right-shifts only --
-- mux layers: right-shifts only --
barrel_shifter_core:
for i in index_size_f(XLEN)-1 downto 0 generate
bs_level(i)(XLEN-1 downto XLEN-(2**i)) <= (others => bs_mask) when (shamt_i(i) = '1') else bs_level(i+1)(XLEN-1 downto XLEN-(2**i));
bs_level(i)((XLEN-(2**i))-1 downto 0) <= bs_level(i+1)(XLEN-1 downto 2**i) when (shamt_i(i) = '1') else bs_level(i+1)((XLEN-(2**i))-1 downto 0);
for i in 0 to index_size_f(XLEN)-1 generate
bs_level(i+1)(XLEN-1 downto XLEN-(2**i)) <= (others => bs_sign) when (shamt_i(i) = '1') else bs_level(i)(XLEN-1 downto XLEN-(2**i));
bs_level(i+1)((XLEN-(2**i))-1 downto 0) <= bs_level(i)(XLEN-1 downto 2**i) when (shamt_i(i) = '1') else bs_level(i)((XLEN-(2**i))-1 downto 0);
end generate;

-- pipeline register --
Expand All @@ -127,7 +127,7 @@ begin
bs_result <= (others => '0');
elsif rising_edge(clk_i) then -- this register stage can be moved by the register balancing
bs_start <= start_i;
bs_result <= bs_level(0);
bs_result <= bs_level(index_size_f(XLEN));
end if;
end process barrel_shifter_buf;

Expand Down
14 changes: 1 addition & 13 deletions rtl/core/neorv32_cpu_regfile.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,8 @@ architecture neorv32_cpu_regfile_rtl of neorv32_cpu_regfile is

-- access --
signal rf_we : std_ulogic; -- write enable
signal rf_we_sel : std_ulogic_vector((2**addr_bits_c)-1 downto 0); -- one-hot write enable
signal rd_zero : std_ulogic; -- writing to x0?
signal opa_addr : std_ulogic_vector(4 downto 0); -- rs1/rd address
signal rd_addr : std_ulogic_vector(4 downto 0); -- rd address
signal rs3_addr : std_ulogic_vector(4 downto 0); -- rs3 address
signal rs4_addr : std_ulogic_vector(4 downto 0); -- rs4 address

Expand Down Expand Up @@ -101,16 +99,6 @@ begin
register_file_asic:
if RST_EN generate

-- "write" to x0 if no write access --
rd_addr <= ctrl_i.rf_rd(addr_bits_c-1 downto 0) when (ctrl_i.rf_wb_en = '1') else (others => '0');

-- write enable decoder --
we_decode: process(rd_addr)
begin
rf_we_sel <= (others => '0');
rf_we_sel(to_integer(unsigned(rd_addr(addr_bits_c-1 downto 0)))) <= '1';
end process we_decode;

-- individual registers --
reg_gen:
for i in 1 to (2**addr_bits_c)-1 generate
Expand All @@ -119,7 +107,7 @@ begin
if (rstn_i = '0') then
reg_file(i) <= (others => '0');
elsif rising_edge(clk_i) then
if (rf_we_sel(i) = '1') then
if (unsigned(ctrl_i.rf_rd(addr_bits_c-1 downto 0)) = to_unsigned(i, addr_bits_c)) and (ctrl_i.rf_wb_en = '1') then
reg_file(i) <= rd_i;
end if;
end if;
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_dma.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ begin
-- -------------------------------------------------------------------------------------------

-- endianness conversion --
align_end <= dma_rsp_i.data when (config.endian = '0') else bswap32_f(dma_rsp_i.data);
align_end <= dma_rsp_i.data when (config.endian = '0') else bswap_f(dma_rsp_i.data);

-- source data alignment --
src_align: process(rstn_i, clk_i)
Expand Down
29 changes: 15 additions & 14 deletions rtl/core/neorv32_package.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ package neorv32_package is

-- Architecture Constants -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090804"; -- hardware version
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090805"; -- hardware version
constant archid_c : natural := 19; -- official RISC-V architecture ID
constant XLEN : natural := 32; -- native data path width

Expand Down Expand Up @@ -694,7 +694,7 @@ package neorv32_package is
function to_hstring32_f(input : std_ulogic_vector(31 downto 0)) return string;
function bit_rev_f(input : std_ulogic_vector) return std_ulogic_vector;
function is_power_of_two_f(input : natural) return boolean;
function bswap32_f(input : std_ulogic_vector) return std_ulogic_vector;
function bswap_f(input : std_ulogic_vector) return std_ulogic_vector;
function popcount_f(input : std_ulogic_vector) return natural;
function leading_zeros_f(input : std_ulogic_vector) return natural;
impure function mem32_init_f(init : mem32_t; depth : natural) return mem32_t;
Expand Down Expand Up @@ -1034,7 +1034,7 @@ package body neorv32_package is
end case;
end function su_undefined_f;

-- Convert std_ulogic_vector to lowercase HEX char ----------------------------------------
-- Convert std_ulogic_vector to lowercase hex char ----------------------------------------
-- -------------------------------------------------------------------------------------------
function to_hexchar_f(input : std_ulogic_vector(3 downto 0)) return character is
variable hex_v : string(1 to 16);
Expand Down Expand Up @@ -1073,41 +1073,42 @@ package body neorv32_package is
-- Test if input number is a power of two -------------------------------------------------
-- -------------------------------------------------------------------------------------------
function is_power_of_two_f(input : natural) return boolean is
variable tmp : unsigned(31 downto 0);
variable tmp_v : unsigned(31 downto 0);
begin
if (input = 0) then
return false;
elsif (input = 1) then
return true;
else
tmp := to_unsigned(input, 32);
if ((tmp and (tmp - 1)) = 0) then
tmp_v := to_unsigned(input, 32);
if ((tmp_v and (tmp_v - 1)) = 0) then
return true;
else
return false;
end if;
end if;
end function is_power_of_two_f;

-- Swap all bytes of a 32-bit word (endianness conversion) --------------------------------
-- Swap all bytes of a N*8-bit word (endianness conversion) -------------------------------
-- -------------------------------------------------------------------------------------------
function bswap32_f(input : std_ulogic_vector) return std_ulogic_vector is
function bswap_f(input : std_ulogic_vector) return std_ulogic_vector is
variable output_v : std_ulogic_vector(input'range);
variable j : natural range 0 to input'length/8;
begin
output_v(07 downto 00) := input(31 downto 24);
output_v(15 downto 08) := input(23 downto 16);
output_v(23 downto 16) := input(15 downto 08);
output_v(31 downto 24) := input(07 downto 00);
for i in 0 to (input'length/8)-1 loop
j := ((input'length/8) - 1) - i;
output_v(i*8+7 downto i*8+0) := input(j*8+7 downto j*8+0);
end loop;
return output_v;
end function bswap32_f;
end function bswap_f;

-- Population count (number of set bits) --------------------------------------------------
-- -------------------------------------------------------------------------------------------
function popcount_f(input : std_ulogic_vector) return natural is
variable cnt_v : natural range 0 to input'length;
begin
cnt_v := 0;
for i in input'length-1 downto 0 loop
for i in 0 to input'length-1 loop
if (input(i) = '1') then
cnt_v := cnt_v + 1;
end if;
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_xip.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ begin

when S_BUSY => -- wait for PHY to complete operation
-- ------------------------------------------------------------
xip_rsp_o.data <= bswap32_f(phy_if.rdata); -- convert incrementing byte-read to little-endian
xip_rsp_o.data <= bswap_f(phy_if.rdata); -- convert incrementing byte-read to little-endian
if (phy_if.busy = '0') then
xip_rsp_o.ack <= '1';
arbiter.state_nxt <= S_IDLE;
Expand Down

0 comments on commit 19363d3

Please sign in to comment.