Skip to content

Commit

Permalink
✨ [sw] emulate missing AMO instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
stnolting committed Jul 6, 2024
1 parent 9a03baa commit 5324a7f
Showing 1 changed file with 75 additions and 23 deletions.
98 changes: 75 additions & 23 deletions sw/example/atomic_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
/** UART BAUD rate */
#define BAUD_RATE (19200)
//** Number of test cases for each instruction */
#define NUM_TEST_CASES (10000)
#define NUM_TEST_CASES (1000)
//** Silent mode (only show actual errors when != 0) */
#define SILENT_MODE (1)

Expand All @@ -38,14 +38,64 @@ volatile uint32_t amo_var;


/**********************************************************************//**
* MIN/MAX helpers.
* Emulate atomic memory operation.
*
* @note This is a RTE "second-level" trap handler.
**************************************************************************/
/**@{*/
static inline int32_t MAX(int32_t a, int32_t b) { return((a) > (b) ? a : b); }
static inline int32_t MIN(int32_t a, int32_t b) { return((a) < (b) ? a : b); }
static inline int32_t MAXU(uint32_t a, uint32_t b) { return((a) > (b) ? a : b); }
static inline int32_t MINU(uint32_t a, uint32_t b) { return((a) < (b) ? a : b); }
/**@}*/
void trap_handler_emulate_amo(void) {

uint32_t inst = neorv32_cpu_csr_read(CSR_MTINST);

// decompose I-type instruction
uint32_t opcode = (inst >> 0) & 0x07f;
uint32_t rd_addr = (inst >> 7) & 0x01f;
uint32_t funct3 = (inst >> 12) & 0x003;
uint32_t rs1_addr = (inst >> 15) & 0x01f;
uint32_t rs2_addr = (inst >> 20) & 0x01f;
uint32_t funct5 = (inst >> 27) & 0x01f;

// set opcode bit 1 as the instruction word might be transformed (de-compressed)
opcode |= 1 << 1;

#if 0
neorv32_uart0_printf("\n<< EMULATING >>\n");
neorv32_uart0_printf(" opcode: 0x%x\n", opcode);
neorv32_uart0_printf(" rd_addr: %u\n", rd_addr);
neorv32_uart0_printf(" funct3: %u\n", funct3);
neorv32_uart0_printf(" rs1_addr: %u\n", rs1_addr);
neorv32_uart0_printf(" rs2_addr: %u\n", rs2_addr);
neorv32_uart0_printf(" funct5: 0x%x\n", funct5);
neorv32_uart0_printf("<< /EMULATING >>\n\n");
#endif

// emulate if valid A operation and A ISA extension is available
if ((opcode == 0b0101111) && (funct3 == 0b010) && (neorv32_cpu_csr_read(CSR_MISA) & (1 << 0))) {
// get operands from main's context
uint32_t rs1 = neorv32_rte_context_get(rs1_addr);
uint32_t rs2 = neorv32_rte_context_get(rs2_addr);
uint32_t rd = 0, valid = 0;
// emulated functions
switch (funct5) {
case 0b00001: rd = neorv32_cpu_amoswapw(rs1, rs2); valid = 1; break; // amoswap.w
case 0b00000: rd = neorv32_cpu_amoaddw( rs1, rs2); valid = 1; break; // amoadd.w
case 0b00100: rd = neorv32_cpu_amoxorw( rs1, rs2); valid = 1; break; // amoxor.w
case 0b01100: rd = neorv32_cpu_amoandw( rs1, rs2); valid = 1; break; // amoand.w
case 0b01000: rd = neorv32_cpu_amoorw( rs1, rs2); valid = 1; break; // amoor.w
case 0b10000: rd = neorv32_cpu_amominw( rs1, rs2); valid = 1; break; // amomin.w
case 0b10100: rd = neorv32_cpu_amomaxw( rs1, rs2); valid = 1; break; // amomax.w
case 0b11000: rd = neorv32_cpu_amominuw(rs1, rs2); valid = 1; break; // amominu.w
case 0b11100: rd = neorv32_cpu_amomaxuw(rs1, rs2); valid = 1; break; // amomaxu.w
default: neorv32_rte_debug_handler(); break; // use the RTE debug handler for any other misaligned load exception
}
if (valid) {
// write result back to main's context
neorv32_rte_context_put(rd_addr, rd);
}
}
else {
neorv32_rte_debug_handler();
}
}


/**********************************************************************//**
Expand All @@ -61,6 +111,8 @@ int main() {

// capture all exceptions and give debug info via UART
neorv32_rte_setup();
// install trap handler for "unaligned load address" exception
neorv32_rte_handler_install(RTE_TRAP_I_ILLEGAL, trap_handler_emulate_amo);

// setup UART0 at default baud rate, no interrupts
neorv32_uart0_setup(BAUD_RATE, 0);
Expand Down Expand Up @@ -96,7 +148,7 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amoswapw(amo_addr, amo_var_update);
asm volatile ("amoswap.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = amo_var_update;

Expand All @@ -114,7 +166,7 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amoaddw(amo_addr, amo_var_update);
asm volatile ("amoadd.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = amo_var_old + amo_var_update;

Expand All @@ -132,7 +184,7 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amoandw(amo_addr, amo_var_update);
asm volatile ("amoand.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = amo_var_old & amo_var_update;

Expand All @@ -150,7 +202,7 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amoorw(amo_addr, amo_var_update);
asm volatile ("amoor.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = amo_var_old | amo_var_update;

Expand All @@ -168,7 +220,7 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amoxorw(amo_addr, amo_var_update);
asm volatile ("amoxor.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = amo_var_old ^ amo_var_update;

Expand All @@ -186,9 +238,9 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amomaxw(amo_addr, (int32_t)amo_var_update);
asm volatile ("amomax.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = (uint32_t)MAX((int32_t)amo_var_old, (int32_t)amo_var_update);
amo_var_new = (uint32_t)neorv32_aux_max((int32_t)amo_var_old, (int32_t)amo_var_update);

err_cnt += check_result(i, amo_var_old, amo_var_pre, amo_var_new, amo_var);
}
Expand All @@ -204,9 +256,9 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amomaxuw(amo_addr, amo_var_update);
asm volatile ("amomaxu.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = MAXU(amo_var_old, amo_var_update);
amo_var_new = neorv32_aux_max(amo_var_old, amo_var_update);

err_cnt += check_result(i, amo_var_old, amo_var_pre, amo_var_new, amo_var);
}
Expand All @@ -222,9 +274,9 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amominw(amo_addr, (int32_t)amo_var_update);
asm volatile ("amomin.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = (uint32_t)MIN((int32_t)amo_var_old, (int32_t)amo_var_update);
amo_var_new = (uint32_t)neorv32_aux_min((int32_t)amo_var_old, (int32_t)amo_var_update);

err_cnt += check_result(i, amo_var_old, amo_var_pre, amo_var_new, amo_var);
}
Expand All @@ -240,9 +292,9 @@ int main() {

amo_var = amo_var_old;
asm volatile ("fence");
amo_var_pre = neorv32_cpu_amominuw(amo_addr, amo_var_update);
asm volatile ("amominu.w %[dest], %[data], 0(%[addr])" : [dest] "=r" (amo_var_pre) : [data] "r" (amo_var_update), [addr] "r" (amo_addr));
asm volatile ("fence");
amo_var_new = MINU(amo_var_old, amo_var_update);
amo_var_new = neorv32_aux_min(amo_var_old, amo_var_update);

err_cnt += check_result(i, amo_var_old, amo_var_pre, amo_var_new, amo_var);
}
Expand Down Expand Up @@ -273,12 +325,12 @@ int main() {
uint32_t check_result(uint32_t num, uint32_t amo_var_old, uint32_t amo_var_pre, uint32_t amo_var_new, uint32_t amo_var) {

#if (SILENT_MODE == 0)
neorv32_uart0_printf("%u: MEM_INITIAL[addr] = 0x%x vs. MEM_PRE[addr] = 0x%x | MEM_NEW_ref[addr] = 0x%x vs. MEM_NEW[addr] = 0x%x, ", num, amo_var_old, amo_var_pre, amo_var_new, amo_var);
neorv32_uart0_printf("%u: MEM_INITIAL[addr] = 0x%x vs. MEM_PRE[addr] = 0x%x & MEM_NEW_ref[addr] = 0x%x vs. MEM_NEW[addr] = 0x%x, ", num, amo_var_old, amo_var_pre, amo_var_new, amo_var);
#endif

if ((amo_var_old != amo_var_pre) || (amo_var_new != amo_var)) {
#if (SILENT_MODE != 0)
neorv32_uart0_printf("%u: MEM_INITIAL[addr] = 0x%x vs. MEM_PRE[addr] = 0x%x | MEM_NEW_ref[addr] = 0x%x vs. MEM_NEW[addr] = 0x%x, ", num, amo_var_old, amo_var_pre, amo_var_new, amo_var);
neorv32_uart0_printf("%u: MEM_INITIAL[addr] = 0x%x vs. MEM_PRE[addr] = 0x%x & MEM_NEW_ref[addr] = 0x%x vs. MEM_NEW[addr] = 0x%x, ", num, amo_var_old, amo_var_pre, amo_var_new, amo_var);
#endif
neorv32_uart0_printf("%c[1m[FAILED]%c[0m\n", 27, 27);
return 1;
Expand Down

2 comments on commit 5324a7f

@NikLeberg
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uuuh, I like it! Great work!

@stnolting
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! 😉
I'm planning to integrate trap-based into the runtime environment to provide support for all "not-fully" supported ISA extensions. However, I'm not completely satisfied yet. What if there is another trap inside the trap handler, for example an unaligned memory access? This is something that should be fixed (somehow).

Please sign in to comment.