Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate FPGA-accelerated PoW #50

Merged
merged 50 commits into from
Aug 22, 2018
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
d8f0073
Begin at code of Lampa Lab
ajblane Jul 4, 2018
2154f76
Implement related functions for using the accelerator
ajblane Jul 4, 2018
d2bf35d
Add the BUILD_FPGA_LAMPALAB option into Makefile
ajblane Jul 4, 2018
6c3f646
Add the accelerator function into dcurl
ajblane Aug 7, 2018
c47bd82
Test FPGA PoW of Lampa Lab
ajblane Jul 13, 2018
14dea4b
Fix the bug of returning local variable of function
ajblane Jul 13, 2018
aea24e3
Fix the bug by adding type translation
ajblane Jul 26, 2018
01ab815
Typo for comments
ajblane Jul 26, 2018
60503e9
Use clang-format for h files and c files
ajblane Aug 4, 2018
98af4d4
Rename the functions but do it not well
ajblane Jul 26, 2018
79ccfe3
Do not show print info when doing PoW
ajblane Jul 26, 2018
269700f
Rewrite name scheme
ajblane Jul 26, 2018
a370a98
Add information of building FPGA accelerator to README
ajblane Jul 26, 2018
48f419c
Use existing functions and remove useless files
ajblane Jul 26, 2018
2eef531
Type consistency for all implemented PoW functions
ajblane Jul 26, 2018
a12c471
Correct spelling errors
ajblane Jul 26, 2018
71a016f
Skip kernel message
ajblane Jul 27, 2018
1e83b29
Make them with static
ajblane Aug 4, 2018
38b346f
Drop the useless message
ajblane Aug 4, 2018
5674857
Typo for the comment
ajblane Aug 4, 2018
1da48e7
Rewrite consistent naming
ajblane Aug 4, 2018
5b67f55
Drop extra blank lines
ajblane Aug 4, 2018
1d5dc44
Advoid terminating program when fail PoW init
ajblane Aug 4, 2018
4587adb
Declare it with type consistency and rename it
ajblane Aug 5, 2018
2433b9c
Modify README
ajblane Aug 5, 2018
252c1f1
Make it naming consistency
ajblane Aug 5, 2018
1086c5a
Drop unnecessary header files included
ajblane Aug 5, 2018
0537f5c
Modify copyright notice and remove blank line
ajblane Aug 7, 2018
4f33b60
Change i = i + 1 to i++
ajblane Aug 7, 2018
9f44c8a
Rename the file and set CFLAGS in Makefile
ajblane Aug 7, 2018
31240dc
Advoid terminating the program when destroy
ajblane Aug 4, 2018
cd99478
Use malloc for output memory of PoW
ajblane Aug 8, 2018
913afa0
Move the unnecessary global variable
ajblane Aug 8, 2018
c7bbf07
Validate resources for the outputs
ajblane Aug 8, 2018
02a9780
Do not terminate the program when destroy fails
ajblane Aug 9, 2018
3b4f93a
Move header inclusions to proper regions
ajblane Aug 9, 2018
df8eedf
Set FPGA configuration for device files
ajblane Aug 9, 2018
f60c26a
Use macros from constants.h
ajblane Aug 10, 2018
2837ed9
Use goto to handle exception
ajblane Aug 13, 2018
43a865f
Rename macros
ajblane Aug 13, 2018
97be9ec
Maintain the coding style
ajblane Aug 13, 2018
a0f78de
Use low-level I/O
ajblane Aug 17, 2018
20949ae
Use memcpy and remove not useful code
ajblane Aug 17, 2018
2002c1e
Rename labels and use clang-format
ajblane Aug 17, 2018
19ea794
Use assert
ajblane Aug 17, 2018
6c98c16
Solve conflicts
ajblane Aug 20, 2018
119e67d
Modify it to the new interface #59
ajblane Aug 20, 2018
fafa723
Drop fpga-accel.mk
ajblane Aug 22, 2018
f220b24
Modify README and add FPGA-ACCEL.md
ajblane Aug 22, 2018
00711c8
Modify FPGA-ACCEL.md
ajblane Aug 22, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ ifeq ("$(BUILD_GPU)","1")
include mk/opencl.mk
endif

ifeq ("$(BUILD_FPGA_ACCEL)","1")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since file fpga-accel.mk is empty, you don't have to include at the moment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you fix?

CFLAGS += -DENABLE_FPGA_ACCEL
endif

ifeq ("$(BUILD_JNI)","1")
include mk/java.mk
endif
Expand Down Expand Up @@ -71,6 +75,10 @@ ifeq ("$(BUILD_COMPAT)", "1")
TESTS += ccurl-multi_pow
endif

ifeq ("$(BUILD_FPGA_ACCEL)","1")
TESTS += pow_fpga_accel
endif

TESTS := $(addprefix $(OUT)/test-, $(TESTS))

LIBS = libdcurl.so
Expand Down Expand Up @@ -112,6 +120,11 @@ OBJS += \
compat-ccurl.o
endif

ifeq ("$(BUILD_FPGA_ACCEL)","1")
OBJS += \
pow_fpga_accel.o
endif

OBJS := $(addprefix $(OUT)/, $(OBJS))

$(OUT)/test-%.o: tests/test-%.c
Expand Down
27 changes: 25 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
[![Build Status](https://travis-ci.org/DLTcollab/dcurl.svg?branch=dev)](https://travis-ci.org/DLTcollab/dcurl)
![Supported IRI version](https://img.shields.io/badge/Supported%20IRI%20Version-1.5.3-brightgreen.svg)

Hardware-accelerated implementation for IOTA PearlDiver, which utilizes multi-threaded SIMD and GPU.
Hardware-accelerated implementation for IOTA PearlDiver, which utilizes multi-threaded SIMD, FPGA and GPU.

# Introduction
dcurl exploits SIMD instructions on CPU and OpenCL on GPU. Both CPU and GPU accelerations can be
enabled in multi-threaded execuction fashion, resulting in much faster proof-of-work (PoW) for IOTA
Reference Implementation (IRI).
Reference Implementation (IRI). Additionally, dcurl also supports the FPGA-accelerated solution further described in docs/FPGA-ACCEL.md

# Warning
* You need to configure paths and flags of OpenCL installation in ```mk/opencl.mk```
* dcurl will automatically configure all the GPU divices on your platform.
* Check JDK installation and set JAVA_HOME if you wish to specify.
* If your platform doesn't support Intel SSE, dcurl would be compiled with naive implementation.
* For the IOTA hardware accelerator, we integrate [Lampa Lab's Cyclone V FPGA PoW](https://github.com/LampaLab/iota_fpga) into dcurl. Lampa Lab provides soc_system.rbf only for DE10-nano board. You need to synthesize to get soc_system.rbf for using Arrow SoCKit board and [this RBF file](https://github.com/ajblane/dcurl/releases/tag/v1.0-SoCKit) can be downloaded from our release. Moreover, you need to download [Lampa Lab-provided Linux image](https://github.com/LampaLab/iota_fpga/releases/tag/v0.1) to flash into the micro-SD card and root password is 123456. Finally, you also need to download dcurl into root directory.

# Build Instructions
* dcurl allows various combinations of build configurations to fit final use scenarios.
Expand All @@ -24,6 +25,7 @@ Reference Implementation (IRI).
- ``BUILD_JNI``: build a shared library for IRI. The build system would generate JNI header file
downloading from [latest JAVA source](https://github.com/chenwei-tw/iri/tree/feat/new_pow_interface).
- ``BUILD_COMPAT``: build extra cCurl compatible interface.
- ``BUILD_FPGA_ACCEL``: build the interface interacting with the Cyclone V FPGA based accelerator. Verified on DE10-nano board and Arrow SoCKit board.
* Alternatively, you can specify conditional build as following:
```shell
$ make BUILD_GPU=0 BUILD_JNI=1 BUILD_AVX=1
Expand Down Expand Up @@ -68,6 +70,27 @@ $ make BUILD_AVX=1 check
[ Verified ]
```

* Test with Arrow SoCKit board
```shell
root@lampa:~# sh init_curl_pow.sh
root@lampa:~# cd dcurl
root@lampa:~/dcurl# make BUILD_FPGA_ACCEL=1 check
```

* Expected Results
```
*** Validating build/test-trinary ***
[ Verified ]
*** Validating build/test-curl ***
[ Verified ]
*** Validating build/test-pow_c ***
[ Verified ]
*** Validating build/test-multi_pow_cpu ***
[ Verified ]
*** Validating build/test-pow_fpga_accel ***
[ Verified ]
```

# Tweaks
* Number of threads to find nonce in CPU
* ```$ export DCURL_NUM_CPU=26```
Expand Down
22 changes: 22 additions & 0 deletions docs/FPGA-ACCEL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

IOTA FPGA-accelerated solution for Dcurl
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use # IOTA FPGA-accelerated solution for dcurl for consistent Markdown style.

----------------------------------------

Dcurl supports IOTA FPGA-accelerated solutions to improve PoW performance. PoW calculation time for MWM=14 is between 0.001 and 0.8 second and 0.14 second in average and The time for MWM=15 is between 0.01 and 2 second and 0.42 second in average. Currently, it is experimented and verfied on Arrow Sockit board and Intel FPGA DE10-Nano board. We reuse the Lampa Lab-provied FPGA-accelerated solution.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually call it dcurl in lowercase.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo: "verfied"


Here is a brief summary of the tasks Lampa Lab have done:
* Use Verilog to implement Curl & POW accelerators
* Use System Verilog and UVM to verify the accelerators
* Synthesize Curl & POW hardware accelerators for Intel FPGA DE10-Nano board and flash it into the board
* Write Linux drivers in Gloden System Reference Design for Curl & POW hardware accelerators and verify them
You want to known it much more and further look at [LampaLab/iota_fpga](https://github.com/LampaLab/iota_fpga)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace "You want to known it much more and further look at" with "More information: ".


Here is a brief summary of the tasks we have done:
* Resynthesize the POW hardware accelerator for Arrow Sockit board and flash it into the board
* Integrate the IOTA PoW hardware accelerator into dcurl's implementation interface
* Test and verify it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't append trailing space lines.





3 changes: 3 additions & 0 deletions mk/defs.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ BUILD_JNI ?= 0

# Build cCurl compatible interface
BUILD_COMPAT ?= 0

# Build FPGA backend or not
BUILD_FPGA_ACCEL ?= 0
11 changes: 11 additions & 0 deletions src/dcurl.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#if defined(ENABLE_OPENCL)
#include "pow_cl.h"
#endif
#if defined(ENABLE_FPGA_ACCEL)
#include "pow_fpga_accel.h"
#endif
#include "trinary.h"
#include "implcontext.h"
#if defined(ENABLE_AVX)
Expand Down Expand Up @@ -51,6 +54,10 @@ extern ImplContext PoWC_Context;
extern ImplContext PoWCL_Context;
#endif

#if defined(ENABLE_FPGA_ACCEL)
extern ImplContext PoWFPGAAccel_Context;
#endif

bool dcurl_init()
{
bool ret = true;
Expand All @@ -67,6 +74,10 @@ bool dcurl_init()
ret &= registerImplContext(&PoWCL_Context);
#endif

#if defined(ENABLE_FPGA_ACCEL)
ret &= registerImplContext(&PoWFPGAAccel_Context);
#endif

#ifdef __APPLE__
notify = dispatch_semaphore_create(0);
#else
Expand Down
232 changes: 232 additions & 0 deletions src/pow_fpga_accel.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/*
* Copyright (C) 2018 dcurl Developers.
* Copyright (c) 2018 Ievgen Korokyi.
* Use of this source code is governed by MIT license that can be
* found in the LICENSE file.
*/

#include "pow_fpga_accel.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include "implcontext.h"
#include "trinary.h"

#define HPS_TO_FPGA_BASE 0xC0000000
#define HPS_TO_FPGA_SPAN 0x0020000
#define HASH_CNT_REG_OFFSET 4
#define TICK_CNT_LOW_REG_OFFSET 5
#define TICK_CNT_HI_REG_OFFSET 6
#define MWM_MASK_REG_OFFSET 3
#define CPOW_BASE 0

/* Set FPGA configuration for device files */
#define DEV_CTRL_FPGA "/dev/cpow-ctrl"
#define DEV_IDATA_FPGA "/dev/cpow-idata"
#define DEV_ODATA_FPGA "/dev/cpow-odata"

#define INT2STRING(I, S) \
{ \
S[0] = I & 0xff; \
S[1] = (I >> 8) & 0xff; \
S[2] = (I >> 16) & 0xff; \
S[3] = (I >> 24) & 0xff; \
}

static int devmem_fd;
static void *fpga_regs_map;
static uint32_t *cpow_map;

static bool PoWFPGAAccel(void *pow_ctx)
{
PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) pow_ctx;

int8_t fpga_out_nonce_trits[NonceTrinarySize];

char result[4];
char buf[4];

Trytes_t *object_trytes =
initTrytes(ctx->input_trytes, (transactionTrinarySize) / 3);
if (!object_trytes)
return false;

Trits_t *object_trits = trits_from_trytes(object_trytes);
if (!object_trits)
return false;

if (write(ctx->in_fd, (char *) object_trits->data, transactionTrinarySize) <
0)
return false;

INT2STRING(ctx->mwm, buf);
if (write(ctx->ctrl_fd, buf, sizeof(buf)) < 0)
return false;
if (read(ctx->ctrl_fd, result, sizeof(result)) < 0)
return false;

if (read(ctx->out_fd, (char *) fpga_out_nonce_trits, NonceTrinarySize) < 0)
return false;

Trits_t *object_nonce_trits =
initTrits(fpga_out_nonce_trits, NonceTrinarySize);
if (!object_nonce_trits)
return false;

Trytes_t *nonce_trytes = trytes_from_trits(object_nonce_trits);
if (!nonce_trytes)
return false;

memcpy(ctx->output_trytes, ctx->input_trytes, (NonceTrinaryOffset) / 3);
memcpy(ctx->output_trytes + ((NonceTrinaryOffset) / 3), nonce_trytes->data,
((transactionTrinarySize) - (NonceTrinaryOffset)) / 3);

freeTrobject(object_trytes);
freeTrobject(object_trits);
freeTrobject(object_nonce_trits);
freeTrobject(nonce_trytes);

return true;
}

static bool PoWFPGAAccel_Context_Initialize(ImplContext *impl_ctx)
{
int i = 0;
devmem_fd = 0;
fpga_regs_map = 0;
cpow_map = 0;

PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) malloc(
sizeof(PoW_FPGA_Accel_Context) * impl_ctx->num_max_thread);
if (!ctx)
goto fail_to_malloc;

for (i = 0; i < impl_ctx->num_max_thread; i++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move int i from Line 49 to the scope in for iteration.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My fault. Skip the previous comment.

ctx[i].ctrl_fd = open(DEV_CTRL_FPGA, O_RDWR);
if (ctx[i].ctrl_fd < 0) {
perror("cpow-ctrl open fail");
goto fail_to_open_ctrl;
}
ctx[i].in_fd = open(DEV_IDATA_FPGA, O_RDWR);
if (ctx[i].in_fd < 0) {
perror("cpow-idata open fail");
goto fail_to_open_idata;
}
ctx[i].out_fd = open(DEV_ODATA_FPGA, O_RDWR);
if (ctx[i].out_fd < 0) {
perror("cpow-odata open fail");
goto fail_to_open_odata;
}
impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1;
}
impl_ctx->context = ctx;
pthread_mutex_init(&impl_ctx->lock, NULL);

devmem_fd = open("/dev/mem", O_RDWR | O_SYNC);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "/dev/mem" should also be used as macro.

Copy link
Collaborator Author

@ajblane ajblane Aug 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this device driver don't be changed by developers to access to the system's physical memory. Therefore, I prefer to not use macro.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay.

if (devmem_fd < 0) {
perror("devmem open");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Properly use goto to reduce the lines for exception handling.

goto fail_to_open_memopen;
}

fpga_regs_map =
(uint32_t *) mmap(NULL, HPS_TO_FPGA_SPAN, PROT_READ | PROT_WRITE,
MAP_SHARED, devmem_fd, HPS_TO_FPGA_BASE);
if (fpga_regs_map == MAP_FAILED) {
perror("devmem mmap");
goto fail_to_open_memmap;
}

cpow_map = (uint32_t *) (fpga_regs_map + CPOW_BASE);

return true;

fail_to_open_memmap:
close(devmem_fd);
fail_to_open_memopen:
close(ctx[i].out_fd);
fail_to_open_odata:
close(ctx[i].in_fd);
fail_to_open_idata:
close(ctx[i].ctrl_fd);
fail_to_open_ctrl:
fail_to_malloc:
for (int j = i - 1; j > 0; j--) {
close(ctx[j].in_fd);
close(ctx[j].out_fd);
close(ctx[j].ctrl_fd);
}
return false;
}

static void PoWFPGAAccel_Context_Destroy(ImplContext *impl_ctx)
{
PoW_FPGA_Accel_Context *ctx = (PoW_FPGA_Accel_Context *) impl_ctx->context;
for (int i = 0; i < impl_ctx->num_max_thread; i++) {
close(ctx[i].in_fd);
close(ctx[i].out_fd);
close(ctx[i].ctrl_fd);
}
free(ctx);

int result = munmap(fpga_regs_map, HPS_TO_FPGA_SPAN);
if (result < 0) {
perror("devmem munmap");
}

close(devmem_fd);
}

static void *PoWFPGAAccel_getPoWContext(ImplContext *impl_ctx,
int8_t *trytes,
int mwm)
{
pthread_mutex_lock(&impl_ctx->lock);
for (int i = 0; i < impl_ctx->num_max_thread; i++) {
if (impl_ctx->bitmap & (0x1 << i)) {
impl_ctx->bitmap &= ~(0x1 << i);
pthread_mutex_unlock(&impl_ctx->lock);
PoW_FPGA_Accel_Context *ctx =
impl_ctx->context + sizeof(PoW_FPGA_Accel_Context) * i;
memcpy(ctx->input_trytes, trytes, (transactionTrinarySize) / 3);
ctx->mwm = mwm;
ctx->indexOfContext = i;
return ctx;
}
}

pthread_mutex_unlock(&impl_ctx->lock);
return NULL; /* It should not happen */
}

static bool PoWFPGAAccel_freePoWContext(ImplContext *impl_ctx, void *pow_ctx)
{
pthread_mutex_lock(&impl_ctx->lock);
impl_ctx->bitmap |= 0x1
<< ((PoW_FPGA_Accel_Context *) pow_ctx)->indexOfContext;
pthread_mutex_unlock(&impl_ctx->lock);
return true;
}

static int8_t *PoWFPGAAccel_getPoWResult(void *pow_ctx)
{
int8_t *ret =
(int8_t *) malloc(sizeof(int8_t) * ((transactionTrinarySize) / 3));
if (!ret)
return NULL;
memcpy(ret, ((PoW_FPGA_Accel_Context *) pow_ctx)->output_trytes,
(transactionTrinarySize) / 3);
return ret;
}

ImplContext PoWFPGAAccel_Context = {
.context = NULL,
.bitmap = 0,
.num_max_thread = 1, // num_max_thread >= 1
.num_working_thread = 0,
.initialize = PoWFPGAAccel_Context_Initialize,
.destroy = PoWFPGAAccel_Context_Destroy,
.getPoWContext = PoWFPGAAccel_getPoWContext,
.freePoWContext = PoWFPGAAccel_freePoWContext,
.doThePoW = PoWFPGAAccel,
.getPoWResult = PoWFPGAAccel_getPoWResult,
};
Loading