Skip to content

Commit

Permalink
🐛 [FPU] fix wiring of exception flags (#733)
Browse files Browse the repository at this point in the history
  • Loading branch information
stnolting committed Nov 20, 2023
2 parents bfc4b60 + ed15475 commit c85bec3
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 43 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12

| Date (*dd.mm.yyyy*) | Version | Comment |
|:-------------------:|:-------:|:--------|
| 20.11.2023 | 1.9.1.3 | :bug: fix wiring of FPU exception flags; [#733](https://github.com/stnolting/neorv32/pull/733) |
| 18.11.2023 | 1.9.1.2 | add XIP clock divider to fine-tune SPI frequency; [#731](https://github.com/stnolting/neorv32/pull/731) |
| 18.11.2023 | 1.9.1.1 | (re-)add SPI high-speed mode, :bug: fix bug in SPI shift register - introduced in v1.9.0.9; [#730](https://github.com/stnolting/neorv32/pull/730) |
| 14.11.2023 | [**:rocket:1.9.1**](https://github.com/stnolting/neorv32/releases/tag/v1.9.1) | **New release** |
Expand Down
15 changes: 5 additions & 10 deletions docs/datasheet/cpu.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -621,21 +621,16 @@ less hardware resources and features faster context changes. This also implies t
register file-related load/store or move instructions. The `Zfinx` extension'S floating-point unit is controlled
via dedicated <<_floating_point_csrs>>.

.Fused Multiply-Add and Division Instructions
[WARNING]
Fused multiply-add instructions `f[n]m[add/sub].s` are not supported!
Division `fdiv.s` and square root `fsqrt.s` instructions are not supported yet!

.Subnormal Number
[WARNING]
Subnormal numbers ("de-normalized" numbers) are not supported by the NEORV32 FPU.
Subnormal numbers (exponent = 0) are _flushed to zero_ setting them to +/- 0 before entering the
FPU's processing core. If a computational instruction (like `fmul.s`) generates a subnormal result, the
result is also flushed to zero during normalization.

[WARNING]
The `Zfinx` extension is not yet officially ratified, but is expected to stay unchanged. There is no
software support for the `Zfinx` extension in the upstream GCC RISC-V port yet. However, an
intrinsic library is provided to utilize the provided `Zfinx` floating-point extension from C-language
code (see `sw/example/floating_point_test`).
Subnormal numbers ("de-normalized" numbers, i.e. exponent = 0) are not supported by the NEORV32 FPU.
Subnormal numbers are _flushed to zero_ setting them to +/- 0 before being processed by **any** FPU operation.
If a computational instruction generates a subnormal result it is also flushed to zero during normalization.

.Instructions and Timing
[cols="<2,<4,<3"]
Expand Down
44 changes: 20 additions & 24 deletions rtl/core/neorv32_cpu_cp_fpu.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is

begin

-- Sanity Checks --------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
assert false report
"[Zfinx] The NEORV32 floating-point unit is still in experimental state." severity warning;


-- ****************************************************************************************************************************
-- Control
-- ****************************************************************************************************************************
Expand Down Expand Up @@ -306,13 +312,12 @@ begin
csr_fflags <= csr_wdata_i(4 downto 0);
end if;
end if;
else -- auto-update
else -- auto-update ("accumulate" flags)
csr_fflags <= csr_fflags or fflags;
end if;
end if;
end process csr_write;


-- read access --
csr_read: process(csr_addr_i, csr_fflags, csr_frm)
begin
Expand All @@ -335,7 +340,7 @@ begin
cmd.instr_f2i <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "11000") else '0';
cmd.instr_sgnj <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00100") else '0';
cmd.instr_minmax <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00101") else '0';
cmd.instr_addsub <= '1' when (ctrl_i.ir_funct12(11 downto 8) = "0000") else '0';
cmd.instr_addsub <= '1' when (ctrl_i.ir_funct12(11 downto 8) = "0000" ) else '0';
cmd.instr_mul <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00010") else '0';

-- binary re-encoding --
Expand All @@ -351,7 +356,7 @@ begin

-- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
-- -------------------------------------------------------------------------------------------
-- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
-- [WARNING] Subnormal numbers are not supported yet and are "flushed to zero"! FIXME / TODO
-- rs1 --
op_data(0)(31) <= rs1_i(31);
op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
Expand All @@ -362,26 +367,17 @@ begin
op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal


-- Number Classifier ----------------------------------------------------------------------
-- O Classifier ----------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
number_classifier: process(op_data)
variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v : std_ulogic;
variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
begin
for i in 0 to 1 loop -- for rs1 and rs2 inputs
-- check for all-zero/all-one --
op_m_all_zero_v := '0';
op_e_all_zero_v := '0';
op_e_all_one_v := '0';
if (or_reduce_f(op_data(i)(22 downto 00)) = '0') then
op_m_all_zero_v := '1';
end if;
if (or_reduce_f(op_data(i)(30 downto 23)) = '0') then
op_e_all_zero_v := '1';
end if;
if (and_reduce_f(op_data(i)(30 downto 23)) = '1') then
op_e_all_one_v := '1';
end if;
op_m_all_zero_v := not or_reduce_f(op_data(i)(22 downto 00));
op_e_all_zero_v := not or_reduce_f(op_data(i)(30 downto 23));
op_e_all_one_v := and_reduce_f(op_data(i)(30 downto 23));

-- check special cases --
op_is_zero_v := op_e_all_zero_v and op_m_all_zero_v; -- zero
Expand Down Expand Up @@ -1449,10 +1445,10 @@ begin
ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
ctrl.state <= S_FINALIZE;
-- The normalizer only checks the class of the inputs and not the result.
-- Check whether adder result is 0.0 which can happen if eg. 1.0 - 1.0
-- Set the ctrl.cnt to 0 to force the resulting exponent to be 0
-- Do not change sreg.lower as that is already all 0s
-- Do not change sign as that should be the right sign from the add/sub
-- Check whether adder result is 0.0 which can happen if eg. 1.0 - 1.0.
-- Set ctrl.cnt to 0 to force the resulting exponent to be 0.
-- Do not change sreg.lower as that is already all 0s.
-- Do not change sign as that should be the right sign from the add/sub.
elsif (unsigned(mantissa_i(47 downto 0)) = 0) then
ctrl.cnt <= (others => '0');
ctrl.state <= S_FINALIZE;
Expand Down Expand Up @@ -1541,14 +1537,14 @@ begin
(sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
else -- result is ok
else -- result is fine as it is
ctrl.res_exp <= ctrl.cnt(7 downto 0);
ctrl.res_man <= sreg.lower;
end if;
-- generate exception flags --
ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
--
ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexact if result is rounded
-- processing done --
done_o <= '1';
ctrl.state <= S_IDLE;

Expand Down
10 changes: 5 additions & 5 deletions rtl/core/neorv32_package.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ package neorv32_package is

-- Architecture Constants -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090102"; -- hardware version
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090103"; -- hardware version
constant archid_c : natural := 19; -- official RISC-V architecture ID
constant XLEN : natural := 32; -- native data path width, do not change!

Expand Down Expand Up @@ -335,11 +335,11 @@ package neorv32_package is
constant fp_class_qnan_c : natural := 9; -- quiet NaN (qNaN)

-- exception flags --
constant fp_exc_nv_c : natural := 0; -- invalid operation
constant fp_exc_dz_c : natural := 1; -- divide by zero
constant fp_exc_nx_c : natural := 0; -- inexact
constant fp_exc_uf_c : natural := 1; -- underflow
constant fp_exc_of_c : natural := 2; -- overflow
constant fp_exc_uf_c : natural := 3; -- underflow
constant fp_exc_nx_c : natural := 4; -- inexact
constant fp_exc_dz_c : natural := 3; -- division by zero
constant fp_exc_nv_c : natural := 4; -- invalid operation

-- special values (single-precision) --
constant fp_single_qnan_c : std_ulogic_vector(31 downto 0) := x"7fc00000"; -- quiet NaN
Expand Down
58 changes: 54 additions & 4 deletions sw/example/floating_point_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
#define SILENT_MODE (1)
//** Run FPU CSR tests when != 0 */
#define RUN_CSR_TESTS (1)
//** Run FPU exception tests when != 0 */
#define RUN_EXC_TESTS (1)
//** Run conversion tests when != 0 */
#define RUN_CONV_TESTS (1)
//** Run add/sub tests when != 0 */
Expand All @@ -90,6 +92,16 @@
/**@}*/


/**********************************************************************//**
* @name Special floating-point encodings
**************************************************************************/
/**@{*/
#define FLOAT32_SNAN ( (uint32_t)(0x7fa00000U) )
#define FLOAT32_PMIN ( (uint32_t)(0x00800000U) )
#define FLOAT32_PMAX ( (uint32_t)(0x7f7fffffU) )
/**@}*/


// Prototypes
uint32_t get_test_vector(void);
uint32_t xorshift32(void);
Expand All @@ -98,8 +110,9 @@ void print_report(uint32_t num_err);


/**********************************************************************//**
* Main function; test all available operations of the NEORV32 'Zfinx' extensions using bit floating-point
* hardware intrinsics and software-only reference functions (emulation).
* Main function; test all available operations of the NEORV32 'Zfinx'
* extensions using floating-point * hardware intrinsics and software-only
* reference functions (emulation).
*
* @note This program requires the Zfinx CPU extension.
*
Expand Down Expand Up @@ -196,8 +209,45 @@ int main() {
test_cnt++;
#endif

// clear FPU status/control word
neorv32_cpu_csr_write(CSR_FCSR, 0);

// ----------------------------------------------------------------------------
// CSR Exception Tests
// ----------------------------------------------------------------------------
#if (RUN_EXC_TESTS != 0)
neorv32_uart0_printf("\n#%u: FFLAGS.NX (inexact)... <WORK IN PROGRESS>\n", test_cnt);
test_cnt++;

neorv32_uart0_printf("\n#%u: FFLAGS.DZ (divide by zero)... DIVISON NOT SUPPORTED!\n", test_cnt);
test_cnt++;

neorv32_uart0_printf("\n#%u: FFLAGS.UF (underflow)... <WORK IN PROGRESS>\n", test_cnt);
test_cnt++;

neorv32_uart0_printf("\n#%u: FFLAGS.OV (overflow)... <WORK IN PROGRESS>\n", test_cnt);
test_cnt++;

neorv32_uart0_printf("\n#%u: FFLAGS.NV (invalid operation)...\n", test_cnt);
err_cnt = 0;
for (i=0;i<(uint32_t)NUM_TEST_CASES; i++) {
neorv32_cpu_csr_write(CSR_FFLAGS, 0);
opa.binary_value = FLOAT32_SNAN; // signaling NAN
opb.binary_value = get_test_vector(); // any number
res_hw.float_value = riscv_intrinsic_fadds(opa.float_value, opb.float_value); // discard result

res_sw.binary_value = (uint32_t)(1 << CSR_FFLAGS_NV);
res_hw.binary_value = neorv32_cpu_csr_read(CSR_FFLAGS) & (1 << CSR_FFLAGS_NV);
err_cnt += verify_result(i, opa.binary_value, opb.binary_value, res_sw.binary_value, res_hw.binary_value);
}
print_report(err_cnt);
err_cnt_total += err_cnt;
test_cnt++;
#endif


// ----------------------------------------------------------------------------
// Initialize FPU hardware
// ----------------------------------------------------------------------------
neorv32_cpu_csr_write(CSR_FCSR, 0); // clear exception flags and set "round to nearest"


// ----------------------------------------------------------------------------
Expand Down

0 comments on commit c85bec3

Please sign in to comment.