diff --git a/docs/FPGA-ACCEL.md b/docs/FPGA-ACCEL.md index 4d3ddf8..3eb090c 100755 --- a/docs/FPGA-ACCEL.md +++ b/docs/FPGA-ACCEL.md @@ -1,6 +1,8 @@ # IOTA FPGA-accelerated solution for Dcurl +This document is divided into tow parts. The first part is to describe motivation and contribution and the other part is to roughly state IOTA FPGA-accelerated integration into dcurl. + dcurl supports IOTA FPGA-accelerated solutions to improve PoW performance. PoW calculation time for MWM=14 is between 0.001 and 0.8 second and 0.14 second in average and The time for MWM=15 is between 0.01 and 2 second and 0.42 second in average. Currently, it is experimented and verified on Arrow Sockit board and Intel FPGA DE10-Nano board. We reuse the Lampa Lab-provided FPGA-accelerated solution. Here is a brief summary of the tasks Lampa Lab have done: @@ -14,3 +16,9 @@ Here is a brief summary of the tasks we have done: * Resynthesize the POW hardware accelerator for Arrow Sockit board and flash it into the board * Integrate the IOTA PoW hardware accelerator into dcurl's implementation interface * Test and verify it + +For IOTA FPGA-accelerated integration into dcurl, we consider one dcurl maps one FPGA board unlike the GPU-accelerated solution which is considered a multi-GPU scenario. + +Here is a simple summary of integrating FPGA-accelerated solution into dcurl: +* Only one thread is allowed to do FPGA PoW +* No multi-thread management inside the FPGA Implementation Context is implemented diff --git a/src/pow_fpga_accel.c b/src/pow_fpga_accel.c index 06e4d62..771e9b8 100644 --- a/src/pow_fpga_accel.c +++ b/src/pow_fpga_accel.c @@ -7,19 +7,10 @@ #include "pow_fpga_accel.h" #include -#include #include #include "implcontext.h" #include "trinary.h" -#define HPS_TO_FPGA_BASE 0xC0000000 -#define HPS_TO_FPGA_SPAN 0x0020000 -#define HASH_CNT_REG_OFFSET 4 -#define TICK_CNT_LOW_REG_OFFSET 5 -#define TICK_CNT_HI_REG_OFFSET 6 -#define MWM_MASK_REG_OFFSET 3 -#define CPOW_BASE 0 - /* Set FPGA configuration for device files */ #define DEV_CTRL_FPGA "/dev/cpow-ctrl" #define DEV_IDATA_FPGA "/dev/cpow-idata" @@ -33,10 +24,6 @@ S[3] = (I >> 24) & 0xff; \ } -static int devmem_fd; -static void *fpga_regs_map; -static uint32_t *cpow_map; - static bool PoWFPGAAccel(void *pow_ctx) { PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) pow_ctx; @@ -95,119 +82,66 @@ static bool PoWFPGAAccel(void *pow_ctx) static bool PoWFPGAAccel_Context_Initialize(ImplContext *impl_ctx) { - int i = 0; - devmem_fd = 0; - fpga_regs_map = 0; - cpow_map = 0; - - PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) malloc( - sizeof(PoW_FPGA_Accel_Context) * impl_ctx->num_max_thread); + PoW_FPGA_Accel_Context *ctx = + (PoW_FPGA_Accel_Context *) malloc(sizeof(PoW_FPGA_Accel_Context)); if (!ctx) goto fail_to_malloc; - for (i = 0; i < impl_ctx->num_max_thread; i++) { - ctx[i].ctrl_fd = open(DEV_CTRL_FPGA, O_RDWR); - if (ctx[i].ctrl_fd < 0) { - perror("cpow-ctrl open fail"); - goto fail_to_open_ctrl; - } - ctx[i].in_fd = open(DEV_IDATA_FPGA, O_RDWR); - if (ctx[i].in_fd < 0) { - perror("cpow-idata open fail"); - goto fail_to_open_idata; - } - ctx[i].out_fd = open(DEV_ODATA_FPGA, O_RDWR); - if (ctx[i].out_fd < 0) { - perror("cpow-odata open fail"); - goto fail_to_open_odata; - } - impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; + ctx->ctrl_fd = open(DEV_CTRL_FPGA, O_RDWR); + if (ctx->ctrl_fd < 0) { + perror("cpow-ctrl open fail"); + goto fail_to_open_ctrl; } - impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); - - devmem_fd = open("/dev/mem", O_RDWR | O_SYNC); - if (devmem_fd < 0) { - perror("devmem open"); - goto fail_to_open_memopen; + ctx->in_fd = open(DEV_IDATA_FPGA, O_RDWR); + if (ctx->in_fd < 0) { + perror("cpow-idata open fail"); + goto fail_to_open_idata; } - - fpga_regs_map = - (uint32_t *) mmap(NULL, HPS_TO_FPGA_SPAN, PROT_READ | PROT_WRITE, - MAP_SHARED, devmem_fd, HPS_TO_FPGA_BASE); - if (fpga_regs_map == MAP_FAILED) { - perror("devmem mmap"); - goto fail_to_open_memmap; + ctx->out_fd = open(DEV_ODATA_FPGA, O_RDWR); + if (ctx->out_fd < 0) { + perror("cpow-odata open fail"); + goto fail_to_open_odata; } - cpow_map = (uint32_t *) (fpga_regs_map + CPOW_BASE); + impl_ctx->context = ctx; + pthread_mutex_init(&impl_ctx->lock, NULL); return true; -fail_to_open_memmap: - close(devmem_fd); -fail_to_open_memopen: - close(ctx[i].out_fd); fail_to_open_odata: - close(ctx[i].in_fd); + close(ctx->in_fd); fail_to_open_idata: - close(ctx[i].ctrl_fd); + close(ctx->ctrl_fd); fail_to_open_ctrl: fail_to_malloc: - for (int j = i - 1; j > 0; j--) { - close(ctx[j].in_fd); - close(ctx[j].out_fd); - close(ctx[j].ctrl_fd); - } return false; } static void PoWFPGAAccel_Context_Destroy(ImplContext *impl_ctx) { PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) impl_ctx->context; - for (int i = 0; i < impl_ctx->num_max_thread; i++) { - close(ctx[i].in_fd); - close(ctx[i].out_fd); - close(ctx[i].ctrl_fd); - } - free(ctx); - int result = munmap(fpga_regs_map, HPS_TO_FPGA_SPAN); - if (result < 0) { - perror("devmem munmap"); - } + close(ctx->in_fd); + close(ctx->out_fd); + close(ctx->ctrl_fd); - close(devmem_fd); + free(ctx); } static void *PoWFPGAAccel_getPoWContext(ImplContext *impl_ctx, int8_t *trytes, int mwm) { - pthread_mutex_lock(&impl_ctx->lock); - for (int i = 0; i < impl_ctx->num_max_thread; i++) { - if (impl_ctx->bitmap & (0x1 << i)) { - impl_ctx->bitmap &= ~(0x1 << i); - pthread_mutex_unlock(&impl_ctx->lock); - PoW_FPGA_Accel_Context *ctx = - impl_ctx->context + sizeof(PoW_FPGA_Accel_Context) * i; - memcpy(ctx->input_trytes, trytes, (transactionTrinarySize) / 3); - ctx->mwm = mwm; - ctx->indexOfContext = i; - return ctx; - } - } + PoW_FPGA_Accel_Context *ctx = impl_ctx->context; + memcpy(ctx->input_trytes, trytes, (transactionTrinarySize) / 3); + ctx->mwm = mwm; + ctx->indexOfContext = 0; - pthread_mutex_unlock(&impl_ctx->lock); - return NULL; /* It should not happen */ + return ctx; } static bool PoWFPGAAccel_freePoWContext(ImplContext *impl_ctx, void *pow_ctx) { - pthread_mutex_lock(&impl_ctx->lock); - impl_ctx->bitmap |= 0x1 - << ((PoW_FPGA_Accel_Context *) pow_ctx)->indexOfContext; - pthread_mutex_unlock(&impl_ctx->lock); return true; }