Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rtl logic optimization and cleanups #880

Merged
merged 5 commits into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12

| Date | Version | Comment | Link |
|:----:|:-------:|:--------|:----:|
| 21.04.2024 | 1.9.8.5 | rtl cleanups and (area) optimizations | [#880](https://github.com/stnolting/neorv32/pull/880) |
| 16.04.2024 | 1.9.8.4 | :warning: use a 4-bit FIRQ select instead of a 16-bit FIRQ mask for DMA auto-trigger configuration | [#877](https://github.com/stnolting/neorv32/pull/877) |
| 15.04.2024 | 1.9.8.3 | :warning: simplify XBUS gateway logic and configuration generics; only "pipelined Wishbone" protocol is supported now | [#876](https://github.com/stnolting/neorv32/pull/876) |
| 14.04.2024 | 1.9.8.2 | :warning: rename SLINK data interface registers; minor CPU control logic/area optimizations | [#874](https://github.com/stnolting/neorv32/pull/874) |
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_cfs.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ begin
cfs_reg_rd(0) <= bin_to_gray_f(cfs_reg_wr(0)); -- convert binary to gray code
cfs_reg_rd(1) <= gray_to_bin_f(cfs_reg_wr(1)); -- convert gray to binary code
cfs_reg_rd(2) <= bit_rev_f(cfs_reg_wr(2)); -- bit reversal
cfs_reg_rd(3) <= bswap32_f(cfs_reg_wr(3)); -- byte swap (endianness conversion)
cfs_reg_rd(3) <= bswap_f(cfs_reg_wr(3)); -- byte swap (endianness conversion)


end neorv32_cfs_rtl;
314 changes: 137 additions & 177 deletions rtl/core/neorv32_cpu_cp_bitmanip.vhd

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions rtl/core/neorv32_cpu_cp_shifter.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ architecture neorv32_cpu_cp_shifter_rtl of neorv32_cpu_cp_shifter is
-- barrel shifter --
type bs_level_t is array (index_size_f(XLEN) downto 0) of std_ulogic_vector(XLEN-1 downto 0);
signal bs_level : bs_level_t;
signal bs_mask : std_ulogic;
signal bs_sign : std_ulogic;
signal bs_start : std_ulogic;
signal bs_result : std_ulogic_vector(XLEN-1 downto 0);

Expand Down Expand Up @@ -109,14 +109,14 @@ begin
if FAST_SHIFT_EN generate

-- input layer: convert left shifts to right shifts by bit-reversal --
bs_level(index_size_f(XLEN)) <= bit_rev_f(rs1_i) when (ctrl_i.ir_funct3(2) = '0') else rs1_i;
bs_mask <= rs1_i(XLEN-1) and ctrl_i.ir_funct12(10); -- MSBs mask for arithmetic/logic shifts
bs_level(0) <= bit_rev_f(rs1_i) when (ctrl_i.ir_funct3(2) = '0') else rs1_i;
bs_sign <= rs1_i(XLEN-1) and ctrl_i.ir_funct12(10); -- sign extension for arithmetic shifts

-- shifter layers: right-shifts only --
-- mux layers: right-shifts only --
barrel_shifter_core:
for i in index_size_f(XLEN)-1 downto 0 generate
bs_level(i)(XLEN-1 downto XLEN-(2**i)) <= (others => bs_mask) when (shamt_i(i) = '1') else bs_level(i+1)(XLEN-1 downto XLEN-(2**i));
bs_level(i)((XLEN-(2**i))-1 downto 0) <= bs_level(i+1)(XLEN-1 downto 2**i) when (shamt_i(i) = '1') else bs_level(i+1)((XLEN-(2**i))-1 downto 0);
for i in 0 to index_size_f(XLEN)-1 generate
bs_level(i+1)(XLEN-1 downto XLEN-(2**i)) <= (others => bs_sign) when (shamt_i(i) = '1') else bs_level(i)(XLEN-1 downto XLEN-(2**i));
bs_level(i+1)((XLEN-(2**i))-1 downto 0) <= bs_level(i)(XLEN-1 downto 2**i) when (shamt_i(i) = '1') else bs_level(i)((XLEN-(2**i))-1 downto 0);
end generate;

-- pipeline register --
Expand All @@ -127,7 +127,7 @@ begin
bs_result <= (others => '0');
elsif rising_edge(clk_i) then -- this register stage can be moved by the register balancing
bs_start <= start_i;
bs_result <= bs_level(0);
bs_result <= bs_level(index_size_f(XLEN));
end if;
end process barrel_shifter_buf;

Expand Down
14 changes: 1 addition & 13 deletions rtl/core/neorv32_cpu_regfile.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,8 @@ architecture neorv32_cpu_regfile_rtl of neorv32_cpu_regfile is

-- access --
signal rf_we : std_ulogic; -- write enable
signal rf_we_sel : std_ulogic_vector((2**addr_bits_c)-1 downto 0); -- one-hot write enable
signal rd_zero : std_ulogic; -- writing to x0?
signal opa_addr : std_ulogic_vector(4 downto 0); -- rs1/rd address
signal rd_addr : std_ulogic_vector(4 downto 0); -- rd address
signal rs3_addr : std_ulogic_vector(4 downto 0); -- rs3 address
signal rs4_addr : std_ulogic_vector(4 downto 0); -- rs4 address

Expand Down Expand Up @@ -101,16 +99,6 @@ begin
register_file_asic:
if RST_EN generate

-- "write" to x0 if no write access --
rd_addr <= ctrl_i.rf_rd(addr_bits_c-1 downto 0) when (ctrl_i.rf_wb_en = '1') else (others => '0');

-- write enable decoder --
we_decode: process(rd_addr)
begin
rf_we_sel <= (others => '0');
rf_we_sel(to_integer(unsigned(rd_addr(addr_bits_c-1 downto 0)))) <= '1';
end process we_decode;

-- individual registers --
reg_gen:
for i in 1 to (2**addr_bits_c)-1 generate
Expand All @@ -119,7 +107,7 @@ begin
if (rstn_i = '0') then
reg_file(i) <= (others => '0');
elsif rising_edge(clk_i) then
if (rf_we_sel(i) = '1') then
if (unsigned(ctrl_i.rf_rd(addr_bits_c-1 downto 0)) = to_unsigned(i, addr_bits_c)) and (ctrl_i.rf_wb_en = '1') then
reg_file(i) <= rd_i;
end if;
end if;
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_dma.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ begin
-- -------------------------------------------------------------------------------------------

-- endianness conversion --
align_end <= dma_rsp_i.data when (config.endian = '0') else bswap32_f(dma_rsp_i.data);
align_end <= dma_rsp_i.data when (config.endian = '0') else bswap_f(dma_rsp_i.data);

-- source data alignment --
src_align: process(rstn_i, clk_i)
Expand Down
29 changes: 15 additions & 14 deletions rtl/core/neorv32_package.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ package neorv32_package is

-- Architecture Constants -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090804"; -- hardware version
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090805"; -- hardware version
constant archid_c : natural := 19; -- official RISC-V architecture ID
constant XLEN : natural := 32; -- native data path width

Expand Down Expand Up @@ -694,7 +694,7 @@ package neorv32_package is
function to_hstring32_f(input : std_ulogic_vector(31 downto 0)) return string;
function bit_rev_f(input : std_ulogic_vector) return std_ulogic_vector;
function is_power_of_two_f(input : natural) return boolean;
function bswap32_f(input : std_ulogic_vector) return std_ulogic_vector;
function bswap_f(input : std_ulogic_vector) return std_ulogic_vector;
function popcount_f(input : std_ulogic_vector) return natural;
function leading_zeros_f(input : std_ulogic_vector) return natural;
impure function mem32_init_f(init : mem32_t; depth : natural) return mem32_t;
Expand Down Expand Up @@ -1034,7 +1034,7 @@ package body neorv32_package is
end case;
end function su_undefined_f;

-- Convert std_ulogic_vector to lowercase HEX char ----------------------------------------
-- Convert std_ulogic_vector to lowercase hex char ----------------------------------------
-- -------------------------------------------------------------------------------------------
function to_hexchar_f(input : std_ulogic_vector(3 downto 0)) return character is
variable hex_v : string(1 to 16);
Expand Down Expand Up @@ -1073,41 +1073,42 @@ package body neorv32_package is
-- Test if input number is a power of two -------------------------------------------------
-- -------------------------------------------------------------------------------------------
function is_power_of_two_f(input : natural) return boolean is
variable tmp : unsigned(31 downto 0);
variable tmp_v : unsigned(31 downto 0);
begin
if (input = 0) then
return false;
elsif (input = 1) then
return true;
else
tmp := to_unsigned(input, 32);
if ((tmp and (tmp - 1)) = 0) then
tmp_v := to_unsigned(input, 32);
if ((tmp_v and (tmp_v - 1)) = 0) then
return true;
else
return false;
end if;
end if;
end function is_power_of_two_f;

-- Swap all bytes of a 32-bit word (endianness conversion) --------------------------------
-- Swap all bytes of a N*8-bit word (endianness conversion) -------------------------------
-- -------------------------------------------------------------------------------------------
function bswap32_f(input : std_ulogic_vector) return std_ulogic_vector is
function bswap_f(input : std_ulogic_vector) return std_ulogic_vector is
variable output_v : std_ulogic_vector(input'range);
variable j : natural range 0 to input'length/8;
begin
output_v(07 downto 00) := input(31 downto 24);
output_v(15 downto 08) := input(23 downto 16);
output_v(23 downto 16) := input(15 downto 08);
output_v(31 downto 24) := input(07 downto 00);
for i in 0 to (input'length/8)-1 loop
j := ((input'length/8) - 1) - i;
output_v(i*8+7 downto i*8+0) := input(j*8+7 downto j*8+0);
end loop;
return output_v;
end function bswap32_f;
end function bswap_f;

-- Population count (number of set bits) --------------------------------------------------
-- -------------------------------------------------------------------------------------------
function popcount_f(input : std_ulogic_vector) return natural is
variable cnt_v : natural range 0 to input'length;
begin
cnt_v := 0;
for i in input'length-1 downto 0 loop
for i in 0 to input'length-1 loop
if (input(i) = '1') then
cnt_v := cnt_v + 1;
end if;
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_xip.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ begin

when S_BUSY => -- wait for PHY to complete operation
-- ------------------------------------------------------------
xip_rsp_o.data <= bswap32_f(phy_if.rdata); -- convert incrementing byte-read to little-endian
xip_rsp_o.data <= bswap_f(phy_if.rdata); -- convert incrementing byte-read to little-endian
if (phy_if.busy = '0') then
xip_rsp_o.ack <= '1';
arbiter.state_nxt <= S_IDLE;
Expand Down