🐛 [FPU] fix wiring of exception flags (#733)

stnolting · Nov 20, 2023 · c85bec3 · c85bec3
2 parents bfc4b60 + ed15475
commit c85bec3
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 43 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12
 
 | Date (*dd.mm.yyyy*) | Version | Comment |
 |:-------------------:|:-------:|:--------|
+| 20.11.2023 | 1.9.1.3 | :bug: fix wiring of FPU exception flags; [#733](https://github.com/stnolting/neorv32/pull/733) |
 | 18.11.2023 | 1.9.1.2 | add XIP clock divider to fine-tune SPI frequency; [#731](https://github.com/stnolting/neorv32/pull/731) |
 | 18.11.2023 | 1.9.1.1 | (re-)add SPI high-speed mode, :bug: fix bug in SPI shift register - introduced in v1.9.0.9; [#730](https://github.com/stnolting/neorv32/pull/730) |
 | 14.11.2023 | [**:rocket:1.9.1**](https://github.com/stnolting/neorv32/releases/tag/v1.9.1) | **New release** |

diff --git a/docs/datasheet/cpu.adoc b/docs/datasheet/cpu.adoc
@@ -621,21 +621,16 @@ less hardware resources and features faster context changes. This also implies t
 register file-related load/store or move instructions. The `Zfinx` extension'S floating-point unit is controlled
 via dedicated <<_floating_point_csrs>>.
 
+.Fused Multiply-Add and Division Instructions
 [WARNING]
 Fused multiply-add instructions `f[n]m[add/sub].s` are not supported!
 Division `fdiv.s` and square root `fsqrt.s` instructions are not supported yet!
 
+.Subnormal Number
 [WARNING]
-Subnormal numbers ("de-normalized" numbers) are not supported by the NEORV32 FPU.
-Subnormal numbers (exponent = 0) are _flushed to zero_ setting them to +/- 0 before entering the
-FPU's processing core. If a computational instruction (like `fmul.s`) generates a subnormal result, the
-result is also flushed to zero during normalization.
-
-[WARNING]
-The `Zfinx` extension is not yet officially ratified, but is expected to stay unchanged. There is no
-software support for the `Zfinx` extension in the upstream GCC RISC-V port yet. However, an
-intrinsic library is provided to utilize the provided `Zfinx` floating-point extension from C-language
-code (see `sw/example/floating_point_test`).
+Subnormal numbers ("de-normalized" numbers, i.e. exponent = 0) are not supported by the NEORV32 FPU.
+Subnormal numbers are _flushed to zero_ setting them to +/- 0 before being processed by **any** FPU operation.
+If a computational instruction generates a subnormal result it is also flushed to zero during normalization.
 
 .Instructions and Timing
 [cols="<2,<4,<3"]

diff --git a/rtl/core/neorv32_cpu_cp_fpu.vhd b/rtl/core/neorv32_cpu_cp_fpu.vhd
@@ -276,6 +276,12 @@ architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is
 
 begin
 
+  -- Sanity Checks --------------------------------------------------------------------------
+  -- -------------------------------------------------------------------------------------------
+  assert false report
+    "[Zfinx] The NEORV32 floating-point unit is still in experimental state." severity warning;
+
+
 -- ****************************************************************************************************************************
 -- Control
 -- ****************************************************************************************************************************
@@ -306,13 +312,12 @@ begin
             csr_fflags <= csr_wdata_i(4 downto 0);
           end if;
         end if;
-      else -- auto-update
+      else -- auto-update ("accumulate" flags)
         csr_fflags <= csr_fflags or fflags;
       end if;
     end if;
   end process csr_write;
 
-
   -- read access --
   csr_read: process(csr_addr_i, csr_fflags, csr_frm)
   begin
@@ -335,7 +340,7 @@ begin
   cmd.instr_f2i    <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "11000") else '0';
   cmd.instr_sgnj   <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00100") else '0';
   cmd.instr_minmax <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00101") else '0';
-  cmd.instr_addsub <= '1' when (ctrl_i.ir_funct12(11 downto 8) = "0000")  else '0';
+  cmd.instr_addsub <= '1' when (ctrl_i.ir_funct12(11 downto 8) = "0000" ) else '0';
   cmd.instr_mul    <= '1' when (ctrl_i.ir_funct12(11 downto 7) = "00010") else '0';
 
   -- binary re-encoding --
@@ -351,7 +356,7 @@ begin
 
   -- Input Operands: Check for subnormal numbers (flush to zero) ----------------------------
   -- -------------------------------------------------------------------------------------------
-  -- Subnormal numbers are not supported and are "flushed to zero"! FIXME / TODO
+  -- [WARNING] Subnormal numbers are not supported yet and are "flushed to zero"! FIXME / TODO
   -- rs1 --
   op_data(0)(31)           <= rs1_i(31);
   op_data(0)(30 downto 23) <= rs1_i(30 downto 23);
@@ -362,26 +367,17 @@ begin
   op_data(1)(22 downto 00) <= (others => '0') when (rs2_i(30 downto 23) = "00000000") else rs2_i(22 downto 0); -- flush mantissa to zero if subnormal
 
 
-  -- Number Classifier ----------------------------------------------------------------------
+  -- O Classifier ----------------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
   number_classifier: process(op_data)
     variable op_m_all_zero_v, op_e_all_zero_v, op_e_all_one_v       : std_ulogic;
     variable op_is_zero_v, op_is_inf_v, op_is_denorm_v, op_is_nan_v : std_ulogic;
   begin
     for i in 0 to 1 loop -- for rs1 and rs2 inputs
       -- check for all-zero/all-one --
-      op_m_all_zero_v := '0';
-      op_e_all_zero_v := '0';
-      op_e_all_one_v  := '0';
-      if (or_reduce_f(op_data(i)(22 downto 00)) = '0') then
-        op_m_all_zero_v := '1';
-      end if;
-      if (or_reduce_f(op_data(i)(30 downto 23)) = '0') then
-        op_e_all_zero_v := '1';
-      end if;
-      if (and_reduce_f(op_data(i)(30 downto 23)) = '1') then
-        op_e_all_one_v  := '1';
-      end if;
+      op_m_all_zero_v := not or_reduce_f(op_data(i)(22 downto 00));
+      op_e_all_zero_v := not or_reduce_f(op_data(i)(30 downto 23));
+      op_e_all_one_v  :=    and_reduce_f(op_data(i)(30 downto 23));
 
       -- check special cases --
       op_is_zero_v   := op_e_all_zero_v and      op_m_all_zero_v;  -- zero
@@ -1449,10 +1445,10 @@ begin
                ctrl.flags(fp_exc_nv_c)) = '1') then -- invalid
             ctrl.state <= S_FINALIZE;
           -- The normalizer only checks the class of the inputs and not the result.
-          -- Check whether adder result is 0.0 which can happen if eg. 1.0 - 1.0
-          -- Set the ctrl.cnt to 0 to force the resulting exponent to be 0
-          -- Do not change sreg.lower as that is already all 0s
-          -- Do not change sign as that should be the right sign from the add/sub
+          -- Check whether adder result is 0.0 which can happen if eg. 1.0 - 1.0.
+          -- Set ctrl.cnt to 0 to force the resulting exponent to be 0.
+          -- Do not change sreg.lower as that is already all 0s.
+          -- Do not change sign as that should be the right sign from the add/sub.
           elsif (unsigned(mantissa_i(47 downto 0)) = 0) then
             ctrl.cnt <= (others => '0');
             ctrl.state <= S_FINALIZE;
@@ -1541,14 +1537,14 @@ begin
                 (sreg.zero = '1') or (ctrl.class(fp_class_neg_denorm_c) = '1') or (ctrl.class(fp_class_pos_denorm_c) = '1') then -- denormalized (flush-to-zero)
             ctrl.res_exp <= fp_single_pos_zero_c(30 downto 23); -- keep original sign
             ctrl.res_man <= fp_single_pos_zero_c(22 downto 00);
-          else -- result is ok
+          else -- result is fine as it is
             ctrl.res_exp <= ctrl.cnt(7 downto 0);
             ctrl.res_man <= sreg.lower;
           end if;
           -- generate exception flags --
           ctrl.flags(fp_exc_nv_c) <= ctrl.flags(fp_exc_nv_c) or ctrl.class(fp_class_snan_c); -- invalid if input is SIGNALING NaN
-          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexcat if result is rounded
-          --
+          ctrl.flags(fp_exc_nx_c) <= ctrl.flags(fp_exc_nx_c) or ctrl.rounded; -- inexact if result is rounded
+          -- processing done --
           done_o     <= '1';
           ctrl.state <= S_IDLE;
 

diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd
@@ -59,7 +59,7 @@ package neorv32_package is
 
   -- Architecture Constants -----------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090102"; -- hardware version
+  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090103"; -- hardware version
   constant archid_c     : natural := 19; -- official RISC-V architecture ID
   constant XLEN         : natural := 32; -- native data path width, do not change!
 
@@ -335,11 +335,11 @@ package neorv32_package is
   constant fp_class_qnan_c       : natural := 9; -- quiet NaN (qNaN)
 
   -- exception flags --
-  constant fp_exc_nv_c : natural := 0; -- invalid operation
-  constant fp_exc_dz_c : natural := 1; -- divide by zero
+  constant fp_exc_nx_c : natural := 0; -- inexact
+  constant fp_exc_uf_c : natural := 1; -- underflow
   constant fp_exc_of_c : natural := 2; -- overflow
-  constant fp_exc_uf_c : natural := 3; -- underflow
-  constant fp_exc_nx_c : natural := 4; -- inexact
+  constant fp_exc_dz_c : natural := 3; -- division by zero
+  constant fp_exc_nv_c : natural := 4; -- invalid operation
 
   -- special values (single-precision) --
   constant fp_single_qnan_c     : std_ulogic_vector(31 downto 0) := x"7fc00000"; -- quiet NaN

diff --git a/sw/example/floating_point_test/main.c b/sw/example/floating_point_test/main.c
@@ -69,6 +69,8 @@
 #define SILENT_MODE        (1)
 //** Run FPU CSR tests when != 0 */
 #define RUN_CSR_TESTS      (1)
+//** Run FPU exception tests when != 0 */
+#define RUN_EXC_TESTS      (1)
 //** Run conversion tests when != 0 */
 #define RUN_CONV_TESTS     (1)
 //** Run add/sub tests when != 0 */
@@ -90,6 +92,16 @@
 /**@}*/
 
 
+/**********************************************************************//**
+ * @name Special floating-point encodings
+ **************************************************************************/
+/**@{*/
+#define FLOAT32_SNAN ( (uint32_t)(0x7fa00000U) )
+#define FLOAT32_PMIN ( (uint32_t)(0x00800000U) )
+#define FLOAT32_PMAX ( (uint32_t)(0x7f7fffffU) )
+/**@}*/
+
+
 // Prototypes
 uint32_t get_test_vector(void);
 uint32_t xorshift32(void);
@@ -98,8 +110,9 @@ void print_report(uint32_t num_err);
 
 
 /**********************************************************************//**
- * Main function; test all available operations of the NEORV32 'Zfinx' extensions using bit floating-point
- * hardware intrinsics and software-only reference functions (emulation).
+ * Main function; test all available operations of the NEORV32 'Zfinx'
+ * extensions using floating-point  * hardware intrinsics and software-only
+ * reference functions (emulation).
  *
  * @note This program requires the Zfinx CPU extension.
  *
@@ -196,8 +209,45 @@ int main() {
   test_cnt++;
 #endif
 
-  // clear FPU status/control word
-  neorv32_cpu_csr_write(CSR_FCSR, 0);
+
+// ----------------------------------------------------------------------------
+// CSR Exception Tests
+// ----------------------------------------------------------------------------
+#if (RUN_EXC_TESTS != 0)
+  neorv32_uart0_printf("\n#%u: FFLAGS.NX (inexact)... <WORK IN PROGRESS>\n", test_cnt);
+  test_cnt++;
+
+  neorv32_uart0_printf("\n#%u: FFLAGS.DZ (divide by zero)... DIVISON NOT SUPPORTED!\n", test_cnt);
+  test_cnt++;
+
+  neorv32_uart0_printf("\n#%u: FFLAGS.UF (underflow)... <WORK IN PROGRESS>\n", test_cnt);
+  test_cnt++;
+
+  neorv32_uart0_printf("\n#%u: FFLAGS.OV (overflow)... <WORK IN PROGRESS>\n", test_cnt);
+  test_cnt++;
+
+  neorv32_uart0_printf("\n#%u: FFLAGS.NV (invalid operation)...\n", test_cnt);
+  err_cnt = 0;
+  for (i=0;i<(uint32_t)NUM_TEST_CASES; i++) {
+    neorv32_cpu_csr_write(CSR_FFLAGS, 0);
+    opa.binary_value = FLOAT32_SNAN; // signaling NAN
+    opb.binary_value = get_test_vector(); // any number
+    res_hw.float_value = riscv_intrinsic_fadds(opa.float_value, opb.float_value); // discard result
+
+    res_sw.binary_value = (uint32_t)(1 << CSR_FFLAGS_NV);
+    res_hw.binary_value = neorv32_cpu_csr_read(CSR_FFLAGS) & (1 << CSR_FFLAGS_NV);
+    err_cnt += verify_result(i, opa.binary_value, opb.binary_value, res_sw.binary_value, res_hw.binary_value);
+  }
+  print_report(err_cnt);
+  err_cnt_total += err_cnt;
+  test_cnt++;
+#endif
+
+
+// ----------------------------------------------------------------------------
+// Initialize FPU hardware
+// ----------------------------------------------------------------------------
+  neorv32_cpu_csr_write(CSR_FCSR, 0); // clear exception flags and set "round to nearest"
 
 
 // ----------------------------------------------------------------------------