diff --git a/CHANGELOG.md b/CHANGELOG.md index 89ec74191..40d673ae2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Link | |:----:|:-------:|:--------|:----:| +| 17.02.2024 | 1.9.5.3 | :warning: reworked CPU's hardware performance monitor (HPMs) events | [#811](https://github.com/stnolting/neorv32/pull/811) | | 16.02.2024 | 1.9.5.2 | :warning: **revert** support for page faults (keep that in mmu branch for now) | [#809](https://github.com/stnolting/neorv32/pull/809) | | 16.02.2024 | 1.9.5.1 | :sparkles: add two new generics to exclude certain PMP modes from synthesis | [#808](https://github.com/stnolting/neorv32/pull/808) | | 16.02.2024 | [**:rocket:1.9.5**](https://github.com/stnolting/neorv32/releases/tag/v1.9.5) | **New release** | | diff --git a/docs/datasheet/cpu_csr.adoc b/docs/datasheet/cpu_csr.adoc index 613e94142..8bcf817c5 100644 --- a/docs/datasheet/cpu_csr.adoc +++ b/docs/datasheet/cpu_csr.adoc @@ -757,23 +757,28 @@ cycle even if more than one trigger event is observed. [cols="^1,^3,^1,<9"] [options="header",grid="rows"] |======================= -| Bit | Name [C] | R/W | Event Description -| 0 | `HPMCNT_EVENT_CY` | r/w | active clock cycle (CPU not in sleep mode) -| 1 | - | r/- | _not implemented, always read as zero_ -| 2 | `HPMCNT_EVENT_IR` | r/w | retired instruction (compressed or uncompressed) -| 3 | `HPMCNT_EVENT_CIR` | r/w | retired compressed instruction -| 4 | `HPMCNT_EVENT_WAIT_IF` | r/w | instruction fetch memory wait cycle -| 5 | `HPMCNT_EVENT_WAIT_II` | r/w | instruction issue pipeline wait cycle -| 6 | `HPMCNT_EVENT_WAIT_MC` | r/w | multi-cycle ALU operation wait cycle (like iterative shift operation) -| 7 | `HPMCNT_EVENT_LOAD` | r/w | memory data load operation -| 8 | `HPMCNT_EVENT_STORE` | r/w | memory data store operation -| 9 | `HPMCNT_EVENT_WAIT_LS` | r/w | load/store memory wait cycle -| 10 | `HPMCNT_EVENT_JUMP` | r/w | unconditional jump / jump-and-link -| 11 | `HPMCNT_EVENT_BRANCH` | r/w | conditional branch (_taken_ or _not taken_) -| 12 | `HPMCNT_EVENT_TBRANCH` | r/w | _taken_ conditional branch -| 13 | `HPMCNT_EVENT_TRAP` | r/w | entered trap (synchronous exception or interrupt) -| 14 | `HPMCNT_EVENT_ILLEGAL` | r/w | illegal instruction exception -|======================= +| Bit | Name [C] | R/W | Event Description +4+^| **RISC-V-compatible** +| 0 | `HPMCNT_EVENT_CY` | r/w | active clock cycle (CPU not in <<_sleep_mode>>) +| 1 | `HPMCNT_EVENT_TM` | r/- | _not implemented_, hardwired to zero +| 2 | `HPMCNT_EVENT_IR` | r/w | any executed instruction (16-bit/compressed or 32-bit/uncompressed) +4+^| **NEORV32-specific** +| 3 | `HPMCNT_EVENT_COMPR` | r/w | any executed 16-bit/compressed (<<_c_isa_extension>>) instruction +| 4 | `HPMCNT_EVENT_WAIT_DIS` | r/w | instruction dispatch wait cycle (wait for instruction prefetch-buffer refill (<<_cpu_control_unit>> IPB); +caused by a fence instruction, a control flow transfer or a instruction fetch bus wait cycle) +| 5 | `HPMCNT_EVENT_WAIT_ALU` | r/w | any delay/wait cycle caused by a _multi-cycle_ <<_cpu_arithmetic_logic_unit>> operation +| 6 | `HPMCNT_EVENT_BRANCH` | r/w | any executed branch instruction (unconditional, conditional-taken or conditional-not-taken) +| 7 | `HPMCNT_EVENT_BRANCHED` | r/w | any control transfer operation (unconditional jump, taken conditional branch or trap entry/exit) +| 8 | `HPMCNT_EVENT_LOAD` | r/w | any executed load operation (including atomic memory operations, <<_a_isa_extension>>) +| 9 | `HPMCNT_EVENT_STORE` | r/w | any executed store operation (including atomic memory operations, <<_a_isa_extension>>) +| 10 | `HPMCNT_EVENT_WAIT_LSU` | r/w | any memory/bus/cache/etc. delay/wait cycle while executing any load or store operation (caused by a data bus wait cycle)) +| 11 | `HPMCNT_EVENT_TRAP` | r/w | starting processing of any trap (<<_traps_exceptions_and_interrupts>>) +|======================= + +.Instruction Retiring ("Retired == Executed") +[IMPORTANT] +The CPU HPM/counter logic treats all executed instruction as "retired" even if they raise an exception, +cause an interrupt, trigger a privilege mode change or were not meant to retire (by the RISC-V spec.). {empty} + diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index bc624b125..ecdbf3428 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -132,13 +132,12 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is -- instruction fetch engine -- type fetch_engine_state_t is (IF_RESTART, IF_REQUEST, IF_PENDING); type fetch_engine_t is record - state : fetch_engine_state_t; - state_prev : fetch_engine_state_t; - restart : std_ulogic; -- buffered restart request (after branch) - pc : std_ulogic_vector(XLEN-1 downto 0); - reset : std_ulogic; -- restart request (after branch) - resp : std_ulogic; -- bus response - priv : std_ulogic; -- fetch privilege level + state : fetch_engine_state_t; + restart : std_ulogic; -- buffered restart request (after branch) + pc : std_ulogic_vector(XLEN-1 downto 0); + reset : std_ulogic; -- restart request (after branch) + resp : std_ulogic; -- bus response + priv : std_ulogic; -- fetch privilege level end record; signal fetch_engine : fetch_engine_t; @@ -190,8 +189,6 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is type execute_engine_t is record state : execute_engine_state_t; state_nxt : execute_engine_state_t; - state_prev : execute_engine_state_t; - state_prev2 : execute_engine_state_t; ir : std_ulogic_vector(31 downto 0); ir_nxt : std_ulogic_vector(31 downto 0); is_ci : std_ulogic; -- current instruction is de-compressed instruction @@ -359,15 +356,11 @@ begin fetch_engine_fsm: process(rstn_i, clk_i) begin if (rstn_i = '0') then - fetch_engine.state <= IF_RESTART; - fetch_engine.state_prev <= IF_RESTART; - fetch_engine.restart <= '1'; -- set to reset IPB - fetch_engine.pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address - fetch_engine.priv <= priv_mode_m_c; -- start in machine mode + fetch_engine.state <= IF_RESTART; + fetch_engine.restart <= '1'; -- set to reset IPB + fetch_engine.pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address + fetch_engine.priv <= priv_mode_m_c; -- start in machine mode elsif rising_edge(clk_i) then - -- previous state (for HPMs only) -- - fetch_engine.state_prev <= fetch_engine.state; - -- restart request -- if (fetch_engine.state = IF_RESTART) then -- restart done fetch_engine.restart <= '0'; @@ -620,25 +613,21 @@ begin execute_engine_fsm_sync: process(rstn_i, clk_i) begin if (rstn_i = '0') then - ctrl <= ctrl_bus_zero_c; - execute_engine.state <= RESTART; - execute_engine.state_prev <= RESTART; - execute_engine.state_prev2 <= RESTART; - execute_engine.ir <= (others => '0'); - execute_engine.is_ci <= '0'; - execute_engine.pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address - execute_engine.next_pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address - execute_engine.link_pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address + ctrl <= ctrl_bus_zero_c; + execute_engine.state <= RESTART; + execute_engine.ir <= (others => '0'); + execute_engine.is_ci <= '0'; + execute_engine.pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address + execute_engine.next_pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address + execute_engine.link_pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address elsif rising_edge(clk_i) then -- control bus -- ctrl <= ctrl_nxt; -- execute engine arbiter -- - execute_engine.state <= execute_engine.state_nxt; - execute_engine.state_prev <= execute_engine.state; - execute_engine.state_prev2 <= execute_engine.state_prev; - execute_engine.ir <= execute_engine.ir_nxt; - execute_engine.is_ci <= execute_engine.is_ci_nxt; + execute_engine.state <= execute_engine.state_nxt; + execute_engine.ir <= execute_engine.ir_nxt; + execute_engine.is_ci <= execute_engine.is_ci_nxt; -- current PC: address of instruction being executed -- if (execute_engine.pc_we = '1') then @@ -2358,29 +2347,25 @@ begin ((csr.privilege = priv_mode_m_c) and (csr.mcyclecfg_minh = '0')) or -- not inhibited when in machine-mode ((csr.privilege = priv_mode_u_c) and (csr.mcyclecfg_uinh = '0')) -- not inhibited when in user-mode ) else '0'; - cnt_event(hpmcnt_event_ir_c) <= '1' when (execute_engine.state = EXECUTE) and ( -- retired (=executed) instruction + cnt_event(hpmcnt_event_tm_c) <= '0'; -- unused/reserved (time) + cnt_event(hpmcnt_event_ir_c) <= '1' when (execute_engine.state = EXECUTE) and ( -- retired (==executed) instruction ((csr.privilege = priv_mode_m_c) and (csr.minstretcfg_minh = '0')) or -- not inhibited when in machine-mode ((csr.privilege = priv_mode_u_c) and (csr.minstretcfg_uinh = '0')) -- not inhibited when in user-mode ) else '0'; - cnt_event(hpmcnt_event_tm_c) <= '0'; -- unused/reserved (time) -- NEORV32-specific counter events (for HPM counters only) -- - cnt_event(hpmcnt_event_cir_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- executed compressed instruction - cnt_event(hpmcnt_event_wait_if_c) <= '1' when (fetch_engine.state = IF_PENDING) and (fetch_engine.state_prev = IF_PENDING) else '0'; -- instruction fetch memory wait cycle - cnt_event(hpmcnt_event_wait_ii_c) <= '1' when (execute_engine.state = DISPATCH) and (execute_engine.state_prev = DISPATCH) else '0'; -- instruction issue wait cycle - cnt_event(hpmcnt_event_wait_mc_c) <= '1' when (execute_engine.state = ALU_WAIT) else '0'; -- multi-cycle alu-operation wait cycle - - cnt_event(hpmcnt_event_load_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '0') else '0'; -- load operation - cnt_event(hpmcnt_event_store_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '1') else '0'; -- store operation - cnt_event(hpmcnt_event_wait_ls_c) <= '1' when (execute_engine.state = MEM_WAIT) and (execute_engine.state_prev2 = MEM_WAIT) else '0'; -- load/store memory wait cycle - - cnt_event(hpmcnt_event_jump_c) <= '1' when (execute_engine.state = BRANCH) and (execute_engine.ir(instr_opcode_lsb_c+2) = '1') else '0'; -- jump (unconditional) - cnt_event(hpmcnt_event_branch_c) <= '1' when (execute_engine.state = BRANCH) and (execute_engine.ir(instr_opcode_lsb_c+2) = '0') else '0'; -- branch (conditional, taken or not taken) - cnt_event(hpmcnt_event_tbranch_c) <= '1' when (execute_engine.state = BRANCHED) and (execute_engine.state_prev = BRANCH) and - (execute_engine.ir(instr_opcode_lsb_c+2) = '0') else '0'; -- taken branch (conditional) - - cnt_event(hpmcnt_event_trap_c) <= '1' when (trap_ctrl.env_enter = '1') else '0'; -- entered trap - cnt_event(hpmcnt_event_illegal_c) <= '1' when (trap_ctrl.env_enter = '1') and (trap_ctrl.cause = trap_iil_c) else '0'; -- illegal operation + cnt_event(hpmcnt_event_compr_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- executed compressed instruction + cnt_event(hpmcnt_event_wait_dis_c) <= '1' when (execute_engine.state = DISPATCH) and (issue_engine.valid = "00") else '0'; -- instruction dispatch wait cycle + cnt_event(hpmcnt_event_wait_alu_c) <= '1' when (execute_engine.state = ALU_WAIT) else '0'; -- multi-cycle ALU co-processor wait cycle + + cnt_event(hpmcnt_event_branch_c) <= '1' when (execute_engine.state = BRANCH) else '0'; -- executed branch instruction + cnt_event(hpmcnt_event_branched_c) <= '1' when (execute_engine.state = BRANCHED) else '0'; -- control flow transfer + + cnt_event(hpmcnt_event_load_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '0') else '0'; -- executed load operation + cnt_event(hpmcnt_event_store_c) <= '1' when (ctrl.lsu_req = '1') and (ctrl.lsu_rw = '1') else '0'; -- executed store operation + cnt_event(hpmcnt_event_wait_lsu_c) <= '1' when (ctrl.lsu_req = '0') and (execute_engine.state = MEM_WAIT) else '0'; -- load/store unit memory wait cycle + + cnt_event(hpmcnt_event_trap_c) <= '1' when (trap_ctrl.env_enter = '1') else '0'; -- entered trap -- **************************************************************************************************************************** diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index d775a51c7..a88309d3b 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -53,7 +53,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090502"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090503"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -688,25 +688,24 @@ package neorv32_package is constant priv_mode_m_c : std_ulogic := '1'; -- machine mode constant priv_mode_u_c : std_ulogic := '0'; -- user mode - -- HPM Event System ----------------------------------------------------------------------- + -- HPM Events ----------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hpmcnt_event_cy_c : natural := 0; -- Active cycle - constant hpmcnt_event_tm_c : natural := 1; -- Time (unused/reserved) - constant hpmcnt_event_ir_c : natural := 2; -- Retired instruction - constant hpmcnt_event_cir_c : natural := 3; -- Retired compressed instruction - constant hpmcnt_event_wait_if_c : natural := 4; -- Instruction fetch memory wait cycle - constant hpmcnt_event_wait_ii_c : natural := 5; -- Instruction issue wait cycle - constant hpmcnt_event_wait_mc_c : natural := 6; -- Multi-cycle ALU-operation wait cycle - constant hpmcnt_event_load_c : natural := 7; -- Load operation - constant hpmcnt_event_store_c : natural := 8; -- Store operation - constant hpmcnt_event_wait_ls_c : natural := 9; -- Load/store memory wait cycle - constant hpmcnt_event_jump_c : natural := 10; -- Unconditional jump - constant hpmcnt_event_branch_c : natural := 11; -- Conditional branch (taken or not taken) - constant hpmcnt_event_tbranch_c : natural := 12; -- Conditional taken branch - constant hpmcnt_event_trap_c : natural := 13; -- Entered trap - constant hpmcnt_event_illegal_c : natural := 14; -- Illegal instruction exception + -- RISC-V-compliant -- + constant hpmcnt_event_cy_c : natural := 0; -- active cycle + constant hpmcnt_event_tm_c : natural := 1; -- time (unused/reserved) + constant hpmcnt_event_ir_c : natural := 2; -- retired instruction + -- NEORV32-specific -- + constant hpmcnt_event_compr_c : natural := 3; -- executed compressed instruction + constant hpmcnt_event_wait_dis_c : natural := 4; -- instruction dispatch wait cycle + constant hpmcnt_event_wait_alu_c : natural := 5; -- multi-cycle ALU co-processor wait cycle + constant hpmcnt_event_branch_c : natural := 6; -- executed branch instruction + constant hpmcnt_event_branched_c : natural := 7; -- control flow transfer + constant hpmcnt_event_load_c : natural := 8; -- load operation + constant hpmcnt_event_store_c : natural := 9; -- store operation + constant hpmcnt_event_wait_lsu_c : natural := 10; -- load-store unit memory wait cycle + constant hpmcnt_event_trap_c : natural := 11; -- entered trap -- - constant hpmcnt_event_size_c : natural := 15; -- length of this list + constant hpmcnt_event_size_c : natural := 12; -- length of this list -- **************************************************************************************************************************** -- Helper Functions diff --git a/sw/example/coremark/core_portme.c b/sw/example/coremark/core_portme.c index ea4ff87f4..94af997d3 100644 --- a/sw/example/coremark/core_portme.c +++ b/sw/example/coremark/core_portme.c @@ -16,7 +16,7 @@ limitations under the License. Original Author: Shay Gal-on */ -/* Modified for the NEORV32 Processor - by Stephan Nolting */ +/* Adjusted for the NEORV32 RISC-V Processor by Stephan Nolting */ #include "coremark.h" #include "core_portme.h" @@ -152,18 +152,15 @@ void portable_init(core_portable *p, int *argc, char *argv[]) { neorv32_cpu_set_mcycle(0); // try to setup as many HPMs as possible - if (num_hpm_cnts_global > 0) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR); } - if (num_hpm_cnts_global > 1) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF); } - if (num_hpm_cnts_global > 2) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); } - if (num_hpm_cnts_global > 3) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC); } - if (num_hpm_cnts_global > 4) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD); } - if (num_hpm_cnts_global > 5) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE); } - if (num_hpm_cnts_global > 6) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS); } - if (num_hpm_cnts_global > 7) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP); } - if (num_hpm_cnts_global > 8) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH); } - if (num_hpm_cnts_global > 9) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH); } - if (num_hpm_cnts_global > 10) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP); } - if (num_hpm_cnts_global > 11) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL); } + if (num_hpm_cnts_global > 0) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_COMPR); } + if (num_hpm_cnts_global > 1) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_DIS); } + if (num_hpm_cnts_global > 2) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_ALU); } + if (num_hpm_cnts_global > 3) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_BRANCH); } + if (num_hpm_cnts_global > 4) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_BRANCHED); } + if (num_hpm_cnts_global > 5) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_LOAD); } + if (num_hpm_cnts_global > 6) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_STORE); } + if (num_hpm_cnts_global > 7) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_WAIT_LSU); } + if (num_hpm_cnts_global > 8) {neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TRAP); } neorv32_uart0_printf("NEORV32: Processor running at %u Hz\n", (uint32_t)NEORV32_SYSINFO->CLK); neorv32_uart0_printf("NEORV32: Executing coremark (%u iterations). This may take some time...\n\n", (uint32_t)ITERATIONS); @@ -194,20 +191,17 @@ void portable_fini(core_portable *p) { p->portable_id = 0; neorv32_uart0_printf("\nNEORV32: Hardware Performance Monitors (low words only)\n"); - neorv32_uart0_printf(" > Active clock cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MCYCLE)); - neorv32_uart0_printf(" > Retired instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MINSTRET)); + neorv32_uart0_printf(" > Active clock cycles : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MCYCLE)); + neorv32_uart0_printf(" > Retired instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MINSTRET)); if (num_hpm_cnts_global == 0) {neorv32_uart0_printf("no HPMs available\n"); } - if (num_hpm_cnts_global > 0) {neorv32_uart0_printf(" > Retired compr. instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } - if (num_hpm_cnts_global > 1) {neorv32_uart0_printf(" > Instr.-fetch wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } - if (num_hpm_cnts_global > 2) {neorv32_uart0_printf(" > Instr.-issue wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } - if (num_hpm_cnts_global > 3) {neorv32_uart0_printf(" > Multi-cycle ALU wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } - if (num_hpm_cnts_global > 4) {neorv32_uart0_printf(" > Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } - if (num_hpm_cnts_global > 5) {neorv32_uart0_printf(" > Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } - if (num_hpm_cnts_global > 6) {neorv32_uart0_printf(" > Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } - if (num_hpm_cnts_global > 7) {neorv32_uart0_printf(" > Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } - if (num_hpm_cnts_global > 8) {neorv32_uart0_printf(" > Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } - if (num_hpm_cnts_global > 9) {neorv32_uart0_printf(" > Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); } - if (num_hpm_cnts_global > 10) {neorv32_uart0_printf(" > Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); } - if (num_hpm_cnts_global > 11) {neorv32_uart0_printf(" > Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER14)); } + if (num_hpm_cnts_global > 0) {neorv32_uart0_printf(" > Compressed instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } + if (num_hpm_cnts_global > 1) {neorv32_uart0_printf(" > Instr. dispatch wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } + if (num_hpm_cnts_global > 2) {neorv32_uart0_printf(" > ALU wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } + if (num_hpm_cnts_global > 3) {neorv32_uart0_printf(" > Branch instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } + if (num_hpm_cnts_global > 4) {neorv32_uart0_printf(" > Control flow transfers) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } + if (num_hpm_cnts_global > 5) {neorv32_uart0_printf(" > Load instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } + if (num_hpm_cnts_global > 6) {neorv32_uart0_printf(" > Store instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } + if (num_hpm_cnts_global > 7) {neorv32_uart0_printf(" > Load/store wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } + if (num_hpm_cnts_global > 8) {neorv32_uart0_printf(" > Entered traps) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } neorv32_uart0_printf("\n"); } diff --git a/sw/example/coremark/core_portme.h b/sw/example/coremark/core_portme.h index 1c763bd56..648efd95b 100644 --- a/sw/example/coremark/core_portme.h +++ b/sw/example/coremark/core_portme.h @@ -33,7 +33,7 @@ Original Author: Shay Gal-on /************************/ #define BAUD_RATE (19200) #define ITERATIONS (2000) -#define FLAGS_STR "-> default, see makefile" // compiler optimization +#define FLAGS_STR "see makefile" /************************/ /* Data types and settings */ diff --git a/sw/example/demo_hpm/main.c b/sw/example/demo_hpm/main.c index ecb398409..b6ee5b86a 100644 --- a/sw/example/demo_hpm/main.c +++ b/sw/example/demo_hpm/main.c @@ -3,7 +3,7 @@ // # ********************************************************************************************* # // # BSD 3-Clause License # // # # -// # Copyright (c) 2023, Stephan Nolting. All rights reserved. # +// # Copyright (c) 2024, Stephan Nolting. All rights reserved. # // # # // # Redistribution and use in source and binary forms, with or without modification, are # // # permitted provided that the following conditions are met: # @@ -85,13 +85,13 @@ int main() { // intro neorv32_uart0_printf("\n<<< NEORV32 Hardware Performance Monitors (HPMs) Example Program >>>\n\n"); - neorv32_uart0_printf("NOTE: This program will use up to 12 HPM counters (if available).\n\n"); + neorv32_uart0_printf("[NOTE] This program will use up to 9 HPM counters (if available).\n\n"); // show HPM hardware configuration uint32_t hpm_num = neorv32_cpu_hpm_get_num_counters(); uint32_t hpm_width = neorv32_cpu_hpm_get_size(); - neorv32_uart0_printf("Check: %u HPM counters detected, each %u bits wide\n", hpm_num, hpm_width); + neorv32_uart0_printf("%u HPM counters detected, each %u bits wide\n", hpm_num, hpm_width); // stop all CPU counters including HPMs @@ -101,42 +101,41 @@ int main() { // clear HPM counters (low and high word); // there will be NO exception if we access a HPM counter register that has not been implemented // as long as Zihpm is implemented - neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER3H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER4H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER5H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER6H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER7H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER8H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER9H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER10H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER11H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER12H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER13H, 0); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER14H, 0); + if (hpm_num > 0) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER3H, 0); } + if (hpm_num > 1) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER4H, 0); } + if (hpm_num > 2) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER5H, 0); } + if (hpm_num > 3) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER6H, 0); } + if (hpm_num > 4) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER7H, 0); } + if (hpm_num > 5) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER8H, 0); } + if (hpm_num > 6) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER9H, 0); } + if (hpm_num > 7) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER10H, 0); } + if (hpm_num > 8) { neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMCOUNTER11H, 0); } // NOTE regarding HPMs 0..2, which are not "actual" HPMs - // HPM 0 is the machine cycle counter - // HPM 1 is the machine system timer - // HPM 2 is the machine instret counter - // these "HPMs" have fixed event configurations; however, these according events can also be used for any - // other "real" HPM + // - HPM 0 is the machine cycle counter + // - HPM 1 is the machine system timer + // - HPM 2 is the machine instret counter + // these counters have fixed event configurations; however, these according events can also be used for any other "real" HPM + + // setup base counters if available + if ((neorv32_cpu_csr_read(CSR_MXISA) & (1 << CSR_MXISA_ZICNTR))) { + neorv32_cpu_csr_write(CSR_MCYCLE, 0); neorv32_cpu_csr_write(CSR_MCYCLEH, 0); + neorv32_cpu_csr_write(CSR_MINSTRET, 0); neorv32_cpu_csr_write(CSR_MINSTRETH, 0); + } // configure events - one event per counter; // we can also configure more than one event; the HPM will increment if _any_ event triggers (logical OR); // there will be NO exception if we access a HPM event register that has not been implemented // as long as Zihpm is implemented - neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR); // retired compressed instruction - neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF); // instruction fetch wait (due to high bus traffic) - neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); // instruction issue wait (due to empty instruction-prefetch buffer) - neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC); // wait for multi-cycle ALU operation - neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD); // executed memory LOAD - neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE); // execute memory STORE - neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS); // memory access wait (due to high bus traffic) - neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP); // jump (conditional or unconditional) - neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH); // condition branch (taken or not taken) - neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH); // taken conditional branch - neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP); // entered trap (exception or interrupt) - neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL); // executed illegal instruction + if (hpm_num > 0) { neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_COMPR); } // executed compressed instruction + if (hpm_num > 1) { neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_DIS); } // instruction dispatch wait cycle + if (hpm_num > 2) { neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_ALU); } // multi-cycle ALU co-processor wait cycle + if (hpm_num > 3) { neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_BRANCH); } // executed branch instruction + if (hpm_num > 4) { neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_BRANCHED); } // control flow transfer + if (hpm_num > 5) { neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_LOAD); } // executed load operation + if (hpm_num > 6) { neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_STORE); } // executed store operation + if (hpm_num > 7) { neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_WAIT_LSU); } // load-store unit memory wait cycle + if (hpm_num > 8) { neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TRAP); } // entered trap // enable all CPU counters including HPMs @@ -161,21 +160,22 @@ int main() { // print HPM counter values (low word only) - neorv32_uart0_printf("\nHPM results:\n"); - if (hpm_num > 0) { neorv32_uart0_printf("HPM03.low (compr. instr.) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } - if (hpm_num > 1) { neorv32_uart0_printf("HPM04.low (I-fetch waits) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } - if (hpm_num > 2) { neorv32_uart0_printf("HPM05.low (I-issue waits) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } - if (hpm_num > 3) { neorv32_uart0_printf("HPM06.low (ALU waits) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } - if (hpm_num > 4) { neorv32_uart0_printf("HPM07.low (MEM loads) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } - if (hpm_num > 5) { neorv32_uart0_printf("HPM08.low (MEM stores) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } - if (hpm_num > 6) { neorv32_uart0_printf("HPM09.low (MEM wait) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } - if (hpm_num > 7) { neorv32_uart0_printf("HPM10.low (jumps) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } - if (hpm_num > 8) { neorv32_uart0_printf("HPM11.low (cond. branches) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } - if (hpm_num > 9) { neorv32_uart0_printf("HPM12.low (taken branches) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); } - if (hpm_num > 10) { neorv32_uart0_printf("HPM13.low (EXCs + IRQs) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); } - if (hpm_num > 11) { neorv32_uart0_printf("HPM14.low (illegal instr.) = %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER14)); } - - neorv32_uart0_printf("\nHPM demo program completed.\n"); + neorv32_uart0_printf("\nHPM results (low-words only):\n"); + if ((neorv32_cpu_csr_read(CSR_MXISA) & (1 << CSR_MXISA_ZICNTR))) { + neorv32_uart0_printf(" cycle (active clock cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MCYCLE)); + neorv32_uart0_printf(" instret (retired instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MINSTRET)); + } + if (hpm_num > 0) { neorv32_uart0_printf(" HPM03 (compressed instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } + if (hpm_num > 1) { neorv32_uart0_printf(" HPM04 (instr. dispatch wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } + if (hpm_num > 2) { neorv32_uart0_printf(" HPM05 (ALU wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } + if (hpm_num > 3) { neorv32_uart0_printf(" HPM06 (branch instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } + if (hpm_num > 4) { neorv32_uart0_printf(" HPM07 (control flow transfers) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } + if (hpm_num > 5) { neorv32_uart0_printf(" HPM08 (load instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } + if (hpm_num > 6) { neorv32_uart0_printf(" HPM09 (store instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } + if (hpm_num > 7) { neorv32_uart0_printf(" HPM10 (load/store wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } + if (hpm_num > 8) { neorv32_uart0_printf(" HPM11 (entered traps) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } + + neorv32_uart0_printf("\nProgram completed.\n"); return 0; } diff --git a/sw/example/processor_check/main.c b/sw/example/processor_check/main.c index d7d7063e1..fa21ceee9 100644 --- a/sw/example/processor_check/main.c +++ b/sw/example/processor_check/main.c @@ -211,18 +211,15 @@ int main() { if (num_hpm_cnts_global != 0) { cnt_test++; - neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP); - neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_COMPR); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_DIS); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_ALU); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_BRANCH); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_BRANCHED); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_LOAD); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_STORE); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_WAIT_LSU); + neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TRAP); // make sure there was no exception if (neorv32_cpu_csr_read(CSR_MCAUSE) == mcause_never_c) { @@ -647,7 +644,7 @@ int main() { // disable machine-mode interrupts neorv32_cpu_csr_clr(CSR_MSTATUS, 1 << CSR_MSTATUS_MIE); - tmp_a = trap_cnt; // current amount of illegal instruction exception + tmp_a = trap_cnt; // current number of traps { asm volatile (".align 4"); @@ -676,7 +673,7 @@ int main() { tmp_a += 10; } - tmp_b = trap_cnt; // number of traps we have seen + tmp_b = trap_cnt; // number of traps we have seen here if ((neorv32_cpu_csr_read(CSR_MCAUSE) == TRAP_CODE_I_ILLEGAL) && // illegal instruction exception (neorv32_cpu_csr_read(CSR_MTINST) == 0xfe002fe3) && // instruction word of last illegal instruction @@ -2203,22 +2200,19 @@ int main() { if (neorv32_cpu_csr_read(CSR_MXISA) & (1 << CSR_MXISA_ZIHPM)) { PRINT_STANDARD( "\n\nHPMs:\n" - "#00 Instr. : %u\n" - "#02 Clocks : %u\n" - "#03 C instr. : %u\n" - "#04 IF wait : %u\n" - "#05 II wait : %u\n" - "#06 ALU wait : %u\n" - "#07 MEM LD : %u\n" - "#08 MEM ST : %u\n" - "#09 MEM wait : %u\n" - "#10 Jumps : %u\n" - "#11 Branches : %u\n" - "#12 >taken : %u\n" - "#13 Traps : %u\n" - "#14 Illegals : %u\n", - neorv32_cpu_csr_read(CSR_INSTRET), + "#00 clock cycles : %u\n" + "#02 instructions : %u\n" + "#03 compr. instr. : %u\n" + "#04 DISP waits : %u\n" + "#05 ALU waits : %u\n" + "#06 branch instr. : %u\n" + "#07 ctrl flow tr. : %u\n" + "#08 MEM loads : %u\n" + "#09 MEM stores : %u\n" + "#10 MEM waits : %u\n" + "#11 traps : %u\n", neorv32_cpu_csr_read(CSR_CYCLE), + neorv32_cpu_csr_read(CSR_INSTRET), neorv32_cpu_csr_read(CSR_MHPMCOUNTER3), neorv32_cpu_csr_read(CSR_MHPMCOUNTER4), neorv32_cpu_csr_read(CSR_MHPMCOUNTER5), @@ -2227,10 +2221,7 @@ int main() { neorv32_cpu_csr_read(CSR_MHPMCOUNTER8), neorv32_cpu_csr_read(CSR_MHPMCOUNTER9), neorv32_cpu_csr_read(CSR_MHPMCOUNTER10), - neorv32_cpu_csr_read(CSR_MHPMCOUNTER11), - neorv32_cpu_csr_read(CSR_MHPMCOUNTER12), - neorv32_cpu_csr_read(CSR_MHPMCOUNTER13), - neorv32_cpu_csr_read(CSR_MHPMCOUNTER14) + neorv32_cpu_csr_read(CSR_MHPMCOUNTER11) ); } diff --git a/sw/lib/include/neorv32_cpu_csr.h b/sw/lib/include/neorv32_cpu_csr.h index 113463bfb..42e443373 100644 --- a/sw/lib/include/neorv32_cpu_csr.h +++ b/sw/lib/include/neorv32_cpu_csr.h @@ -398,23 +398,18 @@ enum NEORV32_CSR_XISA_enum { * CPU mhpmevent hardware performance monitor events **************************************************************************/ enum NEORV32_HPMCNT_EVENT_enum { - HPMCNT_EVENT_CY = 0, /**< CPU mhpmevent CSR (0): Active cycle */ - HPMCNT_EVENT_IR = 2, /**< CPU mhpmevent CSR (2): Retired instruction */ - - HPMCNT_EVENT_CIR = 3, /**< CPU mhpmevent CSR (3): Retired compressed instruction */ - HPMCNT_EVENT_WAIT_IF = 4, /**< CPU mhpmevent CSR (4): Instruction fetch memory wait cycle */ - HPMCNT_EVENT_WAIT_II = 5, /**< CPU mhpmevent CSR (5): Instruction issue wait cycle */ - HPMCNT_EVENT_WAIT_MC = 6, /**< CPU mhpmevent CSR (6): Multi-cycle ALU-operation wait cycle */ - HPMCNT_EVENT_LOAD = 7, /**< CPU mhpmevent CSR (7): Load operation */ - HPMCNT_EVENT_STORE = 8, /**< CPU mhpmevent CSR (8): Store operation */ - HPMCNT_EVENT_WAIT_LS = 9, /**< CPU mhpmevent CSR (9): Load/store memory wait cycle */ - - HPMCNT_EVENT_JUMP = 10, /**< CPU mhpmevent CSR (10): Unconditional jump */ - HPMCNT_EVENT_BRANCH = 11, /**< CPU mhpmevent CSR (11): Conditional branch (taken or not taken) */ - HPMCNT_EVENT_TBRANCH = 12, /**< CPU mhpmevent CSR (12): Conditional taken branch */ - - HPMCNT_EVENT_TRAP = 13, /**< CPU mhpmevent CSR (13): Entered trap */ - HPMCNT_EVENT_ILLEGAL = 14 /**< CPU mhpmevent CSR (14): Illegal instruction exception */ + HPMCNT_EVENT_CY = 0, /**< CPU mhpmevent CSR (0): Active cycle */ + HPMCNT_EVENT_TM = 1, /**< CPU mhpmevent CSR (1): Reserved */ + HPMCNT_EVENT_IR = 2, /**< CPU mhpmevent CSR (2): Retired instruction */ + HPMCNT_EVENT_COMPR = 3, /**< CPU mhpmevent CSR (3): Executed compressed instruction */ + HPMCNT_EVENT_WAIT_DIS = 4, /**< CPU mhpmevent CSR (4): Instruction dispatch wait cycle */ + HPMCNT_EVENT_WAIT_ALU = 5, /**< CPU mhpmevent CSR (5): Multi-cycle ALU co-processor wait cycle */ + HPMCNT_EVENT_BRANCH = 6, /**< CPU mhpmevent CSR (6): Executed branch instruction */ + HPMCNT_EVENT_BRANCHED = 7, /**< CPU mhpmevent CSR (7): Control flow transfer */ + HPMCNT_EVENT_LOAD = 8, /**< CPU mhpmevent CSR (8): Executed load operation */ + HPMCNT_EVENT_STORE = 9, /**< CPU mhpmevent CSR (9): Executed store operation */ + HPMCNT_EVENT_WAIT_LSU = 10, /**< CPU mhpmevent CSR (10): Load-store unit memory wait cycle */ + HPMCNT_EVENT_TRAP = 11 /**< CPU mhpmevent CSR (11): Entered trap */ };