From 90e6dc329ce6181b5251166d967e547faa0b53d7 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Wed, 23 Jan 2019 03:41:49 +0800 Subject: [PATCH] Integrate libtuv thread pool into dcurl To reduce the overhead of creating and eliminating the threads repeatedly, we integrate the thread pool of libtuv with git submodule. The pthread-related functions and data types are replaced with the corresonding ones of libtuv. The compilation of libtuv library is written in the file mk/submodule.mk. The README.md asks the user to initialize and update the git submodule right after downloading the repository. Close #58. --- .gitmodules | 3 +++ Makefile | 11 +++++++---- README.md | 3 ++- deps/libtuv | 1 + mk/submodule.mk | 8 ++++++++ src/pow_avx.c | 34 ++++++++++++++++++++++++---------- src/pow_avx.h | 5 ++++- src/pow_c.c | 34 ++++++++++++++++++++++++---------- src/pow_c.h | 5 ++++- src/pow_sse.c | 34 ++++++++++++++++++++++++---------- src/pow_sse.h | 5 ++++- 11 files changed, 105 insertions(+), 38 deletions(-) create mode 100644 .gitmodules create mode 160000 deps/libtuv create mode 100644 mk/submodule.mk diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..db8e5f6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "deps/libtuv"] + path = deps/libtuv + url = https://github.com/DLTcollab/libtuv.git diff --git a/Makefile b/Makefile index 920a0c2..fd383cd 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,9 @@ endif # Check specific CPU features available on build host include mk/cpu-features.mk +# Handle git submodule +include mk/submodule.mk + ifeq ("$(BUILD_AVX)","1") CFLAGS += -mavx -DENABLE_AVX ifeq ("$(call cpu_feature,AVX2)","1") @@ -117,17 +120,17 @@ OBJS := $(addprefix $(OUT)/, $(OBJS)) $(OUT)/test-%.o: tests/test-%.c $(VECHO) " CC\t$@\n" - $(Q)$(CC) -o $@ $(CFLAGS) -I $(SRC) -c -MMD -MF $@.d $< + $(Q)$(CC) -o $@ $(CFLAGS) -I $(SRC) -I $(LIBTUV_INCLUDE_PATH) -c -MMD -MF $@.d $< $(OUT)/%.o: $(SRC)/%.c $(VECHO) " CC\t$@\n" - $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF $@.d $< + $(Q)$(CC) -o $@ $(CFLAGS) -I $(LIBTUV_INCLUDE_PATH) -c -MMD -MF $@.d $< -$(OUT)/test-%: $(OUT)/test-%.o $(OBJS) +$(OUT)/test-%: $(OUT)/test-%.o $(OBJS) $(LIBTUV_LIBRARY) $(VECHO) " LD\t$@\n" $(Q)$(CC) -o $@ $^ $(LDFLAGS) -$(OUT)/libdcurl.so: $(OBJS) +$(OUT)/libdcurl.so: $(OBJS) $(LIBTUV_LIBRARY) $(VECHO) " LD\t$@\n" $(Q)$(CC) -shared -o $@ $^ $(LDFLAGS) diff --git a/README.md b/README.md index f72f07f..51513cc 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,10 @@ enabled in multi-threaded execuction fashion, resulting in much faster proof-of- Reference Implementation (IRI). Additionally, dcurl also supports the FPGA-accelerated solution further described in docs/FPGA-ACCEL.md # Warning -* You need to configure paths and flags of OpenCL installation in ```mk/opencl.mk``` +* You need to configure paths and flags of OpenCL installation in ```mk/opencl.mk```. * dcurl will automatically configure all the GPU divices on your platform. * Check JDK installation and set JAVA_HOME if you wish to specify. +* Initialize and update git submodules with `$ git submodule init` and `$ git submodule update` after downloading the `dcurl` repository. * If your platform doesn't support Intel SSE, dcurl would be compiled with naive implementation. * For the IOTA hardware accelerator, we integrate [Lampa Lab's Cyclone V FPGA PoW](https://github.com/LampaLab/iota_fpga) into dcurl. Lampa Lab provides soc_system.rbf only for DE10-nano board. You need to synthesize to get soc_system.rbf for using Arrow SoCKit board and [this RBF file](https://github.com/ajblane/dcurl/releases/tag/v1.0-SoCKit) can be downloaded from our release. Moreover, you need to download [Lampa Lab-provided Linux image](https://github.com/LampaLab/iota_fpga/releases/tag/v0.1) to flash into the micro-SD card and root password is 123456. Finally, you also need to download dcurl into root directory. diff --git a/deps/libtuv b/deps/libtuv new file mode 160000 index 0000000..3177b57 --- /dev/null +++ b/deps/libtuv @@ -0,0 +1 @@ +Subproject commit 3177b57937056ab5255adc4ad338d5e4ee844eab diff --git a/mk/submodule.mk b/mk/submodule.mk new file mode 100644 index 0000000..628ed39 --- /dev/null +++ b/mk/submodule.mk @@ -0,0 +1,8 @@ +# libtuv related variables +LIBTUV_PATH = deps/libtuv +LIBTUV_INCLUDE_PATH = $(LIBTUV_PATH)/include +# PIC (Position-Independent-Code) library +LIBTUV_LIBRARY = $(LIBTUV_PATH)/build/x86_64-linux/release/lib/libtuv.o + +$(LIBTUV_LIBRARY): + $(MAKE) -C $(LIBTUV_PATH) TUV_BUILD_TYPE=release TUV_CREATE_PIC_LIB=yes diff --git a/src/pow_avx.c b/src/pow_avx.c index 89699b6..8c4c0f4 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -415,7 +416,7 @@ long long int pwork256(int8_t mid[], int mwm, int8_t nonce[], int n, } #endif -static void *pworkThread(void *pitem) +static void pworkThread(void *pitem) { Pwork_struct *pworkInfo = (Pwork_struct *) pitem; pworkInfo->ret = pwork256(pworkInfo->mid, pworkInfo->mwm, @@ -429,7 +430,10 @@ static void *pworkThread(void *pitem) pworkInfo->n = -1; } pthread_mutex_unlock(pworkInfo->lock); - pthread_exit(NULL); +} + +static void work_cb(uv_work_t *req) { + pworkThread(req->data); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -494,7 +498,8 @@ bool PowAVX(void *pow_ctx) ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; pthread_mutex_init(&ctx->lock, NULL); - pthread_t *threads = ctx->threads; + uv_loop_t *loop_ptr = &ctx->loop; + uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; int8_t **nonce_array = ctx->nonce_array; @@ -518,12 +523,14 @@ bool PowAVX(void *pow_ctx) pitem[i].lock = &ctx->lock; pitem[i].stopPoW = &ctx->stopPoW; pitem[i].ret = 0; - pthread_create(&threads[i], NULL, pworkThread, (void *) &pitem[i]); + work_req[i].data = &pitem[i]; + uv_queue_work(loop_ptr, &work_req[i], work_cb, NULL); } + uv_run(loop_ptr, UV_RUN_DEFAULT); + int completedIndex = -1; for (int i = 0; i < ctx->num_threads; i++) { - pthread_join(threads[i], NULL); if (pitem[i].n == -1) completedIndex = i; ctx->pow_info.hash_count += (uint64_t) (pitem[i].ret >= 0 ? pitem[i].ret : -pitem[i].ret + 1); @@ -564,14 +571,14 @@ static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) if (!ctx) return false; /* Pre-allocate Memory Chunk for each field */ - void *threads_chunk = malloc(impl_ctx->num_max_thread * sizeof(pthread_t) * nproc); + void *work_req_chunk = malloc(impl_ctx->num_max_thread * sizeof(uv_work_t) * nproc); void *pitem_chunk = malloc(impl_ctx->num_max_thread * sizeof(Pwork_struct) * nproc); void *nonce_ptr_chunk = malloc(impl_ctx->num_max_thread * sizeof(int8_t *) * nproc); void *nonce_chunk = malloc(impl_ctx->num_max_thread * NONCE_TRITS_LENGTH * nproc); - if (!threads_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; + if (!work_req_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; for (int i = 0; i < impl_ctx->num_max_thread; i++) { - ctx[i].threads = (pthread_t *) (threads_chunk + i * sizeof(pthread_t) * nproc); + ctx[i].work_req = (uv_work_t *) (work_req_chunk + i * sizeof(uv_work_t) * nproc); ctx[i].pitem = (Pwork_struct *) (pitem_chunk + i * sizeof(Pwork_struct) * nproc); ctx[i].nonce_array = (int8_t **) (nonce_ptr_chunk + i * sizeof(int8_t *) * nproc); for (int j = 0; j < nproc; j++) @@ -579,14 +586,18 @@ static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) j * NONCE_TRITS_LENGTH); ctx[i].num_max_threads = nproc; impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; + uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; pthread_mutex_init(&impl_ctx->lock, NULL); return true; fail: + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } free(ctx); - free(threads_chunk); + free(work_req_chunk); free(pitem_chunk); free(nonce_ptr_chunk); free(nonce_chunk); @@ -596,7 +607,10 @@ static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) static void PoWAVX_Context_Destroy(ImplContext *impl_ctx) { PoW_AVX_Context *ctx = (PoW_AVX_Context *) impl_ctx->context; - free(ctx[0].threads); + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } + free(ctx[0].work_req); free(ctx[0].pitem); free(ctx[0].nonce_array[0]); free(ctx[0].nonce_array); diff --git a/src/pow_avx.h b/src/pow_avx.h index 3fb6cf8..23ed1b2 100644 --- a/src/pow_avx.h +++ b/src/pow_avx.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "common.h" #include "constants.h" @@ -25,7 +26,9 @@ typedef struct _pow_avx_context PoW_AVX_Context; struct _pow_avx_context { /* Resource of computing */ pthread_mutex_t lock; - pthread_t *threads; + /* Data type of libtuv */ + uv_loop_t loop; + uv_work_t *work_req; Pwork_struct *pitem; int8_t **nonce_array; int stopPoW; diff --git a/src/pow_c.c b/src/pow_c.c index 307daa0..f6479b8 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -176,7 +177,7 @@ static int64_t pwork(int8_t mid[], int mwm, int8_t nonce[], int n, return loop_cpu(lmid, hmid, mwm, nonce, stopPoW); } -static void *pworkThread(void *pitem) +static void pworkThread(void *pitem) { Pwork_struct *pworkInfo = (Pwork_struct *) pitem; pworkInfo->ret = pwork(pworkInfo->mid, pworkInfo->mwm, @@ -190,7 +191,10 @@ static void *pworkThread(void *pitem) pworkInfo->n = -1; } pthread_mutex_unlock(pworkInfo->lock); - pthread_exit(NULL); +} + +static void work_cb(uv_work_t *req) { + pworkThread(req->data); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -255,7 +259,8 @@ bool PowC(void *pow_ctx) ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; pthread_mutex_init(&ctx->lock, NULL); - pthread_t *threads = ctx->threads; + uv_loop_t *loop_ptr = &ctx->loop; + uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; int8_t **nonce_array = ctx->nonce_array; @@ -279,12 +284,14 @@ bool PowC(void *pow_ctx) pitem[i].lock = &ctx->lock; pitem[i].stopPoW = &ctx->stopPoW; pitem[i].ret = 0; - pthread_create(&threads[i], NULL, pworkThread, (void *) &pitem[i]); + work_req[i].data = &pitem[i]; + uv_queue_work(loop_ptr, &work_req[i], work_cb, NULL); } + uv_run(loop_ptr, UV_RUN_DEFAULT); + int completedIndex = -1; for (int i = 0; i < ctx->num_threads; i++) { - pthread_join(threads[i], NULL); if (pitem[i].n == -1) completedIndex = i; ctx->pow_info.hash_count += (uint64_t) (pitem[i].ret >= 0 ? pitem[i].ret : -pitem[i].ret + 1); @@ -324,14 +331,14 @@ static bool PoWC_Context_Initialize(ImplContext *impl_ctx) if (!ctx) return false; /* Pre-allocate Memory Chunk for each field */ - void *threads_chunk = malloc(impl_ctx->num_max_thread * sizeof(pthread_t) * nproc); + void *work_req_chunk = malloc(impl_ctx->num_max_thread * sizeof(uv_work_t) * nproc); void *pitem_chunk = malloc(impl_ctx->num_max_thread * sizeof(Pwork_struct) * nproc); void *nonce_ptr_chunk = malloc(impl_ctx->num_max_thread * sizeof(int8_t *) * nproc); void *nonce_chunk = malloc(impl_ctx->num_max_thread * NONCE_TRITS_LENGTH * nproc); - if (!threads_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; + if (!work_req_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; for (int i = 0; i < impl_ctx->num_max_thread; i++) { - ctx[i].threads = (pthread_t *) (threads_chunk + i * sizeof(pthread_t) * nproc); + ctx[i].work_req = (uv_work_t *) (work_req_chunk + i * sizeof(uv_work_t) * nproc); ctx[i].pitem = (Pwork_struct *) (pitem_chunk + i * sizeof(Pwork_struct) * nproc); ctx[i].nonce_array = (int8_t **) (nonce_ptr_chunk + i * sizeof(int8_t *) * nproc); for (int j = 0; j < nproc; j++) @@ -339,14 +346,18 @@ static bool PoWC_Context_Initialize(ImplContext *impl_ctx) j * NONCE_TRITS_LENGTH); ctx[i].num_max_threads = nproc; impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; + uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; pthread_mutex_init(&impl_ctx->lock, NULL); return true; fail: + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } free(ctx); - free(threads_chunk); + free(work_req_chunk); free(pitem_chunk); free(nonce_ptr_chunk); free(nonce_chunk); @@ -356,7 +367,10 @@ static bool PoWC_Context_Initialize(ImplContext *impl_ctx) static void PoWC_Context_Destroy(ImplContext *impl_ctx) { PoW_C_Context *ctx = (PoW_C_Context *) impl_ctx->context; - free(ctx[0].threads); + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } + free(ctx[0].work_req); free(ctx[0].pitem); free(ctx[0].nonce_array[0]); free(ctx[0].nonce_array); diff --git a/src/pow_c.h b/src/pow_c.h index 49be309..5da041f 100644 --- a/src/pow_c.h +++ b/src/pow_c.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "common.h" #include "constants.h" @@ -26,7 +27,9 @@ typedef struct _pow_c_context PoW_C_Context; struct _pow_c_context { /* Resource of computing */ pthread_mutex_t lock; - pthread_t *threads; + /* Data type of libtuv */ + uv_loop_t loop; + uv_work_t *work_req; Pwork_struct *pitem; int8_t **nonce_array; int stopPoW; diff --git a/src/pow_sse.c b/src/pow_sse.c index 36b6dff..fdb4669 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -193,7 +194,7 @@ static int64_t pwork128(int8_t mid[], int mwm, int8_t nonce[], int n, return loop128(lmid, hmid, mwm, nonce, stopPoW); } -static void *pworkThread(void *pitem) +static void pworkThread(void *pitem) { Pwork_struct *pworkInfo = (Pwork_struct *) pitem; pworkInfo->ret = pwork128(pworkInfo->mid, pworkInfo->mwm, @@ -207,7 +208,10 @@ static void *pworkThread(void *pitem) pworkInfo->n = -1; } pthread_mutex_unlock(pworkInfo->lock); - pthread_exit(NULL); +} + +static void work_cb(uv_work_t *req) { + pworkThread(req->data); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -272,7 +276,8 @@ bool PowSSE(void *pow_ctx) ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; pthread_mutex_init(&ctx->lock, NULL); - pthread_t *threads = ctx->threads; + uv_loop_t *loop_ptr = &ctx->loop; + uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; int8_t **nonce_array = ctx->nonce_array; @@ -296,12 +301,14 @@ bool PowSSE(void *pow_ctx) pitem[i].lock = &ctx->lock; pitem[i].stopPoW = &ctx->stopPoW; pitem[i].ret = 0; - pthread_create(&threads[i], NULL, pworkThread, (void *) &pitem[i]); + work_req[i].data = &pitem[i]; + uv_queue_work(loop_ptr, &work_req[i], work_cb, NULL); } + uv_run(loop_ptr, UV_RUN_DEFAULT); + int completedIndex = -1; for (int i = 0; i < ctx->num_threads; i++) { - pthread_join(threads[i], NULL); if (pitem[i].n == -1) completedIndex = i; ctx->pow_info.hash_count += (uint64_t) (pitem[i].ret >= 0 ? pitem[i].ret : -pitem[i].ret + 1); @@ -341,14 +348,14 @@ static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) if (!ctx) return false; /* Pre-allocate Memory Chunk for each field */ - void *threads_chunk = malloc(impl_ctx->num_max_thread * sizeof(pthread_t) * nproc); + void *work_req_chunk = malloc(impl_ctx->num_max_thread * sizeof(uv_work_t) * nproc); void *pitem_chunk = malloc(impl_ctx->num_max_thread * sizeof(Pwork_struct) * nproc); void *nonce_ptr_chunk = malloc(impl_ctx->num_max_thread * sizeof(int8_t *) * nproc); void *nonce_chunk = malloc(impl_ctx->num_max_thread * NONCE_TRITS_LENGTH * nproc); - if (!threads_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; + if (!work_req_chunk || !pitem_chunk || !nonce_ptr_chunk || !nonce_chunk) goto fail; for (int i = 0; i < impl_ctx->num_max_thread; i++) { - ctx[i].threads = (pthread_t *) (threads_chunk + i * sizeof(pthread_t) * nproc); + ctx[i].work_req = (uv_work_t *) (work_req_chunk + i * sizeof(uv_work_t) * nproc); ctx[i].pitem = (Pwork_struct *) (pitem_chunk + i * sizeof(Pwork_struct) * nproc); ctx[i].nonce_array = (int8_t **) (nonce_ptr_chunk + i * sizeof(int8_t *) * nproc); for (int j = 0; j < nproc; j++) @@ -356,14 +363,18 @@ static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) j * NONCE_TRITS_LENGTH); ctx[i].num_max_threads = nproc; impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; + uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; pthread_mutex_init(&impl_ctx->lock, NULL); return true; fail: + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } free(ctx); - free(threads_chunk); + free(work_req_chunk); free(pitem_chunk); free(nonce_ptr_chunk); free(nonce_chunk); @@ -373,7 +384,10 @@ static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) static void PoWSSE_Context_Destroy(ImplContext *impl_ctx) { PoW_SSE_Context *ctx = (PoW_SSE_Context *) impl_ctx->context; - free(ctx[0].threads); + for (int i = 0; i < impl_ctx->num_max_thread; i++) { + uv_loop_close(&ctx[i].loop); + } + free(ctx[0].work_req); free(ctx[0].pitem); free(ctx[0].nonce_array[0]); free(ctx[0].nonce_array); diff --git a/src/pow_sse.h b/src/pow_sse.h index 4d0d6d1..8e8328d 100644 --- a/src/pow_sse.h +++ b/src/pow_sse.h @@ -7,6 +7,7 @@ #include #include #include +#include typedef struct _pwork_struct Pwork_struct; @@ -25,7 +26,9 @@ typedef struct _pow_sse_context PoW_SSE_Context; struct _pow_sse_context { /* Resource of computing */ pthread_mutex_t lock; - pthread_t *threads; + /* Data type of libtuv */ + uv_loop_t loop; + uv_work_t *work_req; Pwork_struct *pitem; int8_t **nonce_array; int stopPoW;