From bd81dbee532ccb6fb234a85efb79e10c7d3e6b66 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Tue, 5 Mar 2019 11:40:26 +0800 Subject: [PATCH 01/10] Test: Remove Python related code and build script The Python testing code has some problems with the GCC Sanitizers. Since dcurl has already had C testing code for validating dcurl implementation and the IOTA Python library is quite outdated, the Python testing code can be removed safely. --- Makefile | 11 ----- mk/python.mk | 14 ------ tests/test-ccurl-multi_pow.py | 79 ---------------------------------- tests/test-multi-pow.py | 81 ----------------------------------- 4 files changed, 185 deletions(-) delete mode 100644 mk/python.mk delete mode 100644 tests/test-ccurl-multi_pow.py delete mode 100644 tests/test-multi-pow.py diff --git a/Makefile b/Makefile index 41b0186..e5cf280 100644 --- a/Makefile +++ b/Makefile @@ -78,13 +78,8 @@ TESTS = \ trinary \ curl \ dcurl \ - multi-pow \ pow -ifeq ("$(BUILD_COMPAT)", "1") -TESTS += ccurl-multi_pow -endif - TESTS := $(addprefix $(OUT)/test-, $(TESTS)) LIBS = libdcurl.so @@ -156,12 +151,6 @@ $(OUT)/libdcurl.so: $(OBJS) $(VECHO) " LD\t$@\n" $(Q)$(CC) -shared -o $@ $^ $(LDFLAGS) -$(OUT)/test-%: tests/test-%.py $(OUT)/libdcurl.so - $(Q)echo "#!$(PYTHON)" > $@ - $(call py_prepare_cmd) - $(Q)chmod +x $@ - include mk/common.mk -include mk/python.mk -include $(deps) diff --git a/mk/python.mk b/mk/python.mk deleted file mode 100644 index b88a92e..0000000 --- a/mk/python.mk +++ /dev/null @@ -1,14 +0,0 @@ -PYTHON = python3 -PYTHON := $(shell which $(PYTHON)) -ifndef PYTHON -$(error "python3 is required.") -endif - -# check "iota" module in Python installation -PY_CHECK_MOD_IOTA := $(shell $(PYTHON) -c "import iota" 2>/dev/null && \ - echo 1 || echo 0) -ifeq ("$(PY_CHECK_MOD_IOTA)","1") - py_prepare_cmd = $(Q)cat $< >> $@ -else - py_prepare_cmd = $(warning "skip $@ because PyIOTA is not installed.") -endif diff --git a/tests/test-ccurl-multi_pow.py b/tests/test-ccurl-multi_pow.py deleted file mode 100644 index 6c8c9ad..0000000 --- a/tests/test-ccurl-multi_pow.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- - -import ctypes -import sys -import argparse -import _thread -import iota -from iota import TryteString -from iota.crypto import Curl - -TRYTES_LIST_PATH = "./tests/trytes.txt" -DCURL_PATH = "./build/libdcurl.so" -NUM_TRYTES = 10 -RESULT_TX = [] - -join_list = [] -for i in range(NUM_TRYTES): - join_list.append(_thread.allocate_lock()) - -# return list of trytes # -def read_trytes(FILE_PATH): - f = open(FILE_PATH, 'r') - row = f.readlines() - tmp = [] - for r in row: - tmp.append(r.split('\n')[0]) - return tmp - -def hash(trytes): - curl = Curl() - curl.absorb(trytes.as_trits()) - trits_out = [] - curl.squeeze(trits_out) - return TryteString.from_trits(trits_out) - -def validate(trytes, mwm): - trits = trytes.as_trits() - for i in range(len(trits) - 1, len(trits) - mwm - 1, -1): - if trits[i] != 0: - return False - return True - -def call_dcurl(idx, mwm, lib, trytes_list): - tmp = str(trytes_list[idx]).encode('ascii') - ret = lib.ccurl_pow(tmp, mwm) - trytes = TryteString(ret[:2673]) - - hash_trytes = hash(trytes) - RESULT_TX.append(hash_trytes) - - join_list[idx].release() - -def testing(): - trytes_list = read_trytes(TRYTES_LIST_PATH) - - # Settings of shared library - libdcurl = ctypes.cdll.LoadLibrary(DCURL_PATH) - libdcurl.ccurl_pow.argtypes = [ctypes.c_char_p, ctypes.c_int] - libdcurl.ccurl_pow.restype = ctypes.c_char_p - - #libdcurl.dcurl_init(num_cpu, num_gpu) - - for i in range(NUM_TRYTES): - join_list[i].acquire() - _thread.start_new_thread(call_dcurl, (i, 14, libdcurl, trytes_list, )) - - # threadpool.join() - for i in range(NUM_TRYTES): - while join_list[i].locked(): pass - - libdcurl.ccurl_pow_finalize(); - - for tx in RESULT_TX: - if validate(tx, 14) is not True: - sys.exit(1) - -if __name__ == "__main__": - testing() - sys.exit(0) diff --git a/tests/test-multi-pow.py b/tests/test-multi-pow.py deleted file mode 100644 index 82c6577..0000000 --- a/tests/test-multi-pow.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- - -import ctypes -import sys -import argparse -import _thread -import iota -from iota import TryteString -from iota.crypto import Curl - -TRYTES_LIST_PATH = "./tests/trytes.txt" -DCURL_PATH = "./build/libdcurl.so" -NUM_TRYTES = 10 -RESULT_TX = [] - -join_list = [] -for i in range(NUM_TRYTES): - join_list.append(_thread.allocate_lock()) - -# return list of trytes # -def read_trytes(FILE_PATH): - f = open(FILE_PATH, 'r') - row = f.readlines() - tmp = [] - for r in row: - tmp.append(r.split('\n')[0]) - return tmp - -def hash(trytes): - curl = Curl() - curl.absorb(trytes.as_trits()) - trits_out = [] - curl.squeeze(trits_out) - return TryteString.from_trits(trits_out) - -def validate(trytes, mwm): - trits = trytes.as_trits() - for i in range(len(trits) - 1, len(trits) - mwm - 1, -1): - if trits[i] != 0: - return False - return True - -def call_dcurl(idx, mwm, lib, trytes_list): - tmp = str(trytes_list[idx]).encode('ascii') - ret = lib.dcurl_entry(tmp, mwm, 0) - trytes = TryteString(ret[:2673]) - - hash_trytes = hash(trytes) - RESULT_TX.append(hash_trytes) - - join_list[idx].release() - -def testing(): - trytes_list = read_trytes(TRYTES_LIST_PATH) - - # Settings of shared library - libdcurl = ctypes.cdll.LoadLibrary(DCURL_PATH) - #libdcurl.dcurl_init.argtypes = [ctypes.c_int, ctypes.c_int] - libdcurl.dcurl_entry.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_int] - libdcurl.dcurl_entry.restype = ctypes.c_char_p - - libdcurl.dcurl_init() - - for i in range(NUM_TRYTES): - join_list[i].acquire() - _thread.start_new_thread(call_dcurl, (i, 14, libdcurl, trytes_list, )) - - # threadpool.join() - for i in range(NUM_TRYTES): - while join_list[i].locked(): pass - - libdcurl.dcurl_destroy() - - for tx in RESULT_TX: - if validate(tx, 14) is not True: - sys.exit(1) - -if __name__ == "__main__": - testing() - - sys.exit(0) From 1de94ce9db89ea326bb7f472f64d6f1d15b27103 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Wed, 6 Mar 2019 14:23:07 +0800 Subject: [PATCH 02/10] Docs: Revise the branch name from dev to develop The default branch has been renamed from dev to develop. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9d7ebac..994e094 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dcurl - Multi-threaded Curl implementation -[![Build Status](https://travis-ci.org/DLTcollab/dcurl.svg?branch=dev)](https://travis-ci.org/DLTcollab/dcurl) +[![Build Status](https://travis-ci.org/DLTcollab/dcurl.svg?branch=develop)](https://travis-ci.org/DLTcollab/dcurl) ![Supported IRI version](https://img.shields.io/badge/Supported%20IRI%20Version-1.6.0-brightgreen.svg) ![Release version](https://img.shields.io/github/release-pre/DLTcollab/dcurl.svg) @@ -23,7 +23,7 @@ After integrating dcurl into IRI, performance of [attachToTangle](https://iota.r * Each sampling is measured with 30 transaction trytes and total 200 samples are measured. * mwm = 14, 26 CPU threads to find nonce * Settings: enable 2 pow tasks in CPU, 1 pow tasks in GPU at the same time -![](https://github.com/raw/DLTcollab/dcurl/dev/docs/benchmark.png) +![](https://github.com/raw/DLTcollab/dcurl/develop/docs/benchmark.png) ## IRI Adaptation From 0fcc94c69224b204f9d821d0b8f5e8b8a1be794b Mon Sep 17 00:00:00 2001 From: Yu Wei Wu Date: Thu, 7 Mar 2019 16:18:29 +0800 Subject: [PATCH 03/10] fix: rearrange bitwise operations while transform While optimized transform function holds same results, operations in each assignments are not the same number. So we reduce and rearrange some operators back to original positions. Improved performance is still valid. Fix #120 --- src/pow_avx.c | 28 ++++++++++++---------------- src/pow_c.c | 12 ++++++------ src/pow_kernel.cl | 6 +++--- src/pow_sse.c | 12 ++++++------ 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/src/pow_avx.c b/src/pow_avx.c index 83aba86..faa5cec 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -31,10 +31,9 @@ static void transform256(__m256i *lmid, __m256i *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -49,10 +48,9 @@ static void transform256(__m256i *lmid, __m256i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -223,10 +221,9 @@ static void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */ hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -242,10 +239,9 @@ static void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */ hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } diff --git a/src/pow_c.c b/src/pow_c.c index cbf2d81..9f11fd2 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -27,9 +27,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (lfrom[t2] ^ beta); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } uint64_t *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -43,9 +43,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (lfrom[t2] ^ beta); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } } diff --git a/src/pow_kernel.cl b/src/pow_kernel.cl index 10d5aa8..32b7946 100644 --- a/src/pow_kernel.cl +++ b/src/pow_kernel.cl @@ -134,9 +134,9 @@ void transform(__global bc_trit_t* state_low, __global bc_trit_t* state_high, k = j+1; alpha = state_low[INDEX[j]]; beta = state_high[INDEX[j]]; - delta = beta ^ state_low[INDEX[k]]; - sp_low[i] = ~(delta & alpha); - sp_high[i] = delta | (alpha ^ state_high[INDEX[k]]); + delta = alpha & (beta ^ state_low[INDEX[k]]); + sp_low[i] = ~delta; + sp_high[i] = (alpha ^ state_high[INDEX[k]]) | delta; } barrier(CLK_LOCAL_MEM_FENCE); for (i = 0; i < n_trits; i++) { diff --git a/src/pow_sse.c b/src/pow_sse.c index eaea2f1..da0dc1e 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -29,9 +29,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (beta ^ lfrom[t2]); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } __m128i *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -44,9 +44,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (beta ^ lfrom[t2]); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } } From 87610144cd4440c15fba75f0bc10204195e03ecb Mon Sep 17 00:00:00 2001 From: marktwtn Date: Wed, 13 Mar 2019 20:49:13 +0800 Subject: [PATCH 04/10] refactor: Eliminate undefined behavior of shifting Shifting a negative value is an undefined behavior. It is replaced with an equal multiplication operation and the performance remains the same because of the compiler optimization. --- src/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/curl.c b/src/curl.c index 2ab48a8..e587340 100644 --- a/src/curl.c +++ b/src/curl.c @@ -20,7 +20,7 @@ static void _transform(int8_t state[]) for (i = 0; i < STATE_TRITS_LENGTH; i++) { int aa = indices[i]; int bb = indices[i + 1]; - to[i] = truthTable[from[aa] + (from[bb] << 2) + 5]; + to[i] = truthTable[from[aa] + (from[bb] * 4) + 5]; } int8_t *tmp = from; from = to; From 509bb634303d1492cfbd63adf8889fe082f4655f Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 18 Mar 2019 22:38:49 +0800 Subject: [PATCH 05/10] Revise copyright info --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 5a14b0b..b9ed260 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2018 dcurl Developers. +Copyright (C) 2018-2019 BiiLabs Co., Ltd. and Contributors. Copyright (C) 2017 IOTA AS, IOTA Foundation and Developers. Copyright (C) 2016 Shinya Yagyu. From 5c32660527dbab20d575b9b6d09892196f00ce9f Mon Sep 17 00:00:00 2001 From: marktwtn Date: Thu, 14 Feb 2019 16:37:05 +0800 Subject: [PATCH 06/10] perf: Optimize trytes validation with x86 SIMD Without SIMD optimization: Input size(byte) - Average time(nsec) 27 - 100.4 81 - 180.2 2592 - 2901.4 2673 - 2181.1 With SIMD optimization: Input size(byte) - Average time(nsec) 27 - 61.7 81 - 37.3 2592 - 177.7 2673 - 203.1 Hardware information: architecture - x86_64 CPU - AMD Ryzen 5 2400G Related #92. --- Makefile | 4 ++++ src/trinary.c | 7 +++++++ src/trinary_sse42.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 src/trinary_sse42.h diff --git a/Makefile b/Makefile index e5cf280..9cddd6a 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,10 @@ else ifeq ("$(BUILD_GENERIC)","1") CFLAGS += -DENABLE_GENERIC endif +ifeq ("$(call cpu_feature,SSE4_2)","1") + CFLAGS += -msse4.2 +endif + ifeq ("$(BUILD_GPU)","1") include mk/opencl.mk endif diff --git a/src/trinary.c b/src/trinary.c index 8c661e9..5903fbb 100644 --- a/src/trinary.c +++ b/src/trinary.c @@ -5,6 +5,9 @@ */ #include "trinary.h" +#if defined(__SSE4_2__) +#include "trinary_sse42.h" +#endif #include #include "constants.h" #include "curl.h" @@ -41,11 +44,15 @@ static bool validateTrytes(Trobject_t *trytes) if (trytes->type != TYPE_TRYTES) return false; +#if defined(__SSE4_2__) + return validateTrytes_sse42(trytes); +#else for (int i = 0; i < trytes->len; i++) if ((trytes->data[i] < 'A' || trytes->data[i] > 'Z') && trytes->data[i] != '9') return false; return true; +#endif } Trobject_t *initTrits(int8_t *src, int len) diff --git a/src/trinary_sse42.h b/src/trinary_sse42.h new file mode 100644 index 0000000..3e9fff1 --- /dev/null +++ b/src/trinary_sse42.h @@ -0,0 +1,43 @@ +#ifndef TRINARY_SSE42_H_ +#define TRINARY_SSE42_H_ + +#include + +#define BLOCK_8BIT(type) (sizeof(type) / sizeof(int8_t)) + +static inline bool validateTrytes_sse42(Trobject_t *trytes) +{ + const int block_8bit = BLOCK_8BIT(__m128i); + /* Characters from 'A' to 'Z' and '9' to '9' */ + const char *range = "AZ99"; + __m128i pattern = _mm_loadu_si128((__m128i *) (range)); + /* The for loop handles the group of the 128-bit characters without the + * end-of-string */ + for (int i = 0; i < (trytes->len) / block_8bit; i++) { + __m128i src = _mm_loadu_si128((__m128i *) (trytes->data) + i); + /* Check whether the characters are in the defined range or not + * Return 0 if all the characters are in the range, otherwise return 1 + */ + int notValid = _mm_cmpistrc(pattern, src, + /* Signed byte comparison */ + _SIDD_SBYTE_OPS | + /* Compare with the character range */ + _SIDD_CMP_RANGES | + /* Negate the comparison result */ + _SIDD_MASKED_NEGATIVE_POLARITY); + + if (notValid) + return false; + } + /* The for loop handles the rest of the characters until the end-of-string + */ + for (int i = ((trytes->len) / block_8bit) * block_8bit; i < trytes->len; + i++) { + if ((trytes->data[i] < 'A' || trytes->data[i] > 'Z') && + trytes->data[i] != '9') + return false; + } + return true; +} + +#endif From 2128a6c9178fce1a1ec8eb454ed787bde0b0e96f Mon Sep 17 00:00:00 2001 From: marktwtn Date: Thu, 14 Feb 2019 16:38:04 +0800 Subject: [PATCH 07/10] perf: Optimize trits validation with x86 SIMD Without SIMD optimization: Input size(byte) - Average time(nsec) 81 - 90.2 243 - 146.1 729 - 336.5 7776 - 2812.0 8019 - 2902.0 With SIMD optimization: Input size(byte) - Average time(nsec) 81 - 64.2 243 - 57.2 729 - 128.1 7776 - 326.1 8019 - 337.1 Hardware information: architecture - x86_64 CPU - AMD Ryzen 5 2400G Related #92. --- src/trinary.c | 4 ++++ src/trinary_sse42.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/trinary.c b/src/trinary.c index 5903fbb..9e2b8b2 100644 --- a/src/trinary.c +++ b/src/trinary.c @@ -33,10 +33,14 @@ static bool validateTrits(Trobject_t *trits) if (trits->type != TYPE_TRITS) return false; +#if defined(__SSE4_2__) + return validateTrits_sse42(trits); +#else for (int i = 0; i < trits->len; i++) if (trits->data[i] < -1 || trits->data[i] > 1) return false; return true; +#endif } static bool validateTrytes(Trobject_t *trytes) diff --git a/src/trinary_sse42.h b/src/trinary_sse42.h index 3e9fff1..8d47159 100644 --- a/src/trinary_sse42.h +++ b/src/trinary_sse42.h @@ -5,6 +5,38 @@ #define BLOCK_8BIT(type) (sizeof(type) / sizeof(int8_t)) +static inline bool validateTrits_sse42(Trobject_t *trits) +{ + const int block_8bit = BLOCK_8BIT(__m128i); + const int posOneElement = 0x01010101; + const int negOneElement = 0xFFFFFFFF; + const __m128i posOne = _mm_set_epi32(posOneElement, posOneElement, + posOneElement, posOneElement); + const __m128i negOne = _mm_set_epi32(negOneElement, negOneElement, + negOneElement, negOneElement); + /* The for loop handles the group of the 128-bit characters without the + * end-of-string */ + for (int i = 0; i < (trits->len) / block_8bit; i++) { + __m128i data = _mm_loadu_si128((__m128i *) (trits->data) + i); + __m128i result = _mm_or_si128( + /* > 1 */ + _mm_cmpgt_epi8(data, posOne), + /* < -1 */ + _mm_cmplt_epi8(data, negOne)); + int notValid = !_mm_test_all_zeros(result, result); + if (notValid) + return false; + } + /* The for loop handles the rest of the characters until the end-of-string + */ + for (int i = ((trits->len) / block_8bit) * block_8bit; i < trits->len; + i++) { + if (trits->data[i] < -1 || trits->data[i] > 1) + return false; + } + return true; +} + static inline bool validateTrytes_sse42(Trobject_t *trytes) { const int block_8bit = BLOCK_8BIT(__m128i); From c4547da22e86d04fc666a30a315fd63ef45d1147 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Thu, 21 Mar 2019 13:27:58 +0800 Subject: [PATCH 08/10] fix: Avoid data race with read-write lock Use libtuv read-write lock API to avoid the data race detected by thread Sanitizer. Related #95. --- src/pow_avx.c | 38 ++++++++++++++++++++++++++------------ src/pow_avx.h | 4 ++-- src/pow_c.c | 27 +++++++++++++++++++-------- src/pow_c.h | 4 ++-- src/pow_sse.c | 26 +++++++++++++++++--------- src/pow_sse.h | 4 ++-- 6 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/pow_avx.c b/src/pow_avx.c index faa5cec..1e789e4 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -159,12 +159,15 @@ static int loop256(__m256i *lmid, __m256i *hmid, int m, int8_t *nonce, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { int i = 0, n = 0; __m256i lcpy[STATE_TRITS_LENGTH * 2], hcpy[STATE_TRITS_LENGTH * 2]; + uv_rwlock_rdlock(lock); for (i = 0; !incr256(lmid, hmid) && !*stopPoW; i++) { + uv_rwlock_rdunlock(lock); for (int j = 0; j < STATE_TRITS_LENGTH; j++) { lcpy[j] = lmid[j]; hcpy[j] = hmid[j]; @@ -175,7 +178,9 @@ static int loop256(__m256i *lmid, seri256(lmid, hmid, n, nonce); return i * 256; } + uv_rwlock_rdlock(lock); } + uv_rwlock_rdunlock(lock); return -i * 256 - 1; } @@ -183,7 +188,8 @@ static int64_t pwork256(int8_t mid[], int mwm, int8_t nonce[], int n, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { __m256i lmid[STATE_TRITS_LENGTH], hmid[STATE_TRITS_LENGTH]; int offset = HASH_TRITS_LENGTH - NONCE_TRITS_LENGTH; @@ -202,7 +208,7 @@ static int64_t pwork256(int8_t mid[], hmid[offset + 5] = _mm256_set_epi64x(HIGH50, HIGH51, HIGH52, HIGH53); incrN256(n, lmid, hmid); - return loop256(lmid, hmid, mwm, nonce, stopPoW); + return loop256(lmid, hmid, mwm, nonce, stopPoW, lock); } #else /* AVX1 */ @@ -360,12 +366,15 @@ static int loop256(__m256d *lmid, __m256d *hmid, int m, int8_t *nonce, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { int i = 0, n = 0, j = 0; __m256d lcpy[STATE_TRITS_LENGTH * 2], hcpy[STATE_TRITS_LENGTH * 2]; + uv_rwlock_rdlock(lock); for (i = 0; !incr256(lmid, hmid) && !*stopPoW; i++) { + uv_rwlock_rdunlock(lock); for (j = 0; j < STATE_TRITS_LENGTH; j++) { lcpy[j] = lmid[j]; hcpy[j] = hmid[j]; @@ -376,7 +385,9 @@ static int loop256(__m256d *lmid, seri256(lmid, hmid, n, nonce); return i * 256; } + uv_rwlock_rdlock(lock); } + uv_rwlock_rdunlock(lock); return -i * 256 - 1; } @@ -384,7 +395,8 @@ static long long int pwork256(int8_t mid[], int mwm, int8_t nonce[], int n, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { __m256d lmid[STATE_TRITS_LENGTH], hmid[STATE_TRITS_LENGTH]; int offset = HASH_TRITS_LENGTH - NONCE_TRITS_LENGTH; @@ -403,7 +415,7 @@ static long long int pwork256(int8_t mid[], hmid[offset + 5] = _mm256_set_pd(HIGH50, HIGH51, HIGH52, HIGH53); incrN256(n, lmid, hmid); - return loop256(lmid, hmid, mwm, nonce, stopPoW); + return loop256(lmid, hmid, mwm, nonce, stopPoW, lock); } #endif /* __AVX2__ */ @@ -411,16 +423,17 @@ static long long int pwork256(int8_t mid[], static void work_cb(uv_work_t *req) { Pwork_struct *pworkInfo = (Pwork_struct *) req->data; - pworkInfo->ret = pwork256(pworkInfo->mid, pworkInfo->mwm, pworkInfo->nonce, - pworkInfo->n, pworkInfo->stopPoW); + pworkInfo->ret = + pwork256(pworkInfo->mid, pworkInfo->mwm, pworkInfo->nonce, pworkInfo->n, + pworkInfo->stopPoW, pworkInfo->lock); - pthread_mutex_lock(pworkInfo->lock); + uv_rwlock_wrlock(pworkInfo->lock); if (pworkInfo->ret >= 0) { *pworkInfo->stopPoW = 1; /* This means this thread got the result */ pworkInfo->n = -1; } - pthread_mutex_unlock(pworkInfo->lock); + uv_rwlock_wrunlock(pworkInfo->lock); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -490,7 +503,7 @@ static bool PowAVX(void *pow_ctx) ctx->stopPoW = 0; ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; - pthread_mutex_init(&ctx->lock, NULL); + uv_rwlock_init(&ctx->lock); uv_loop_t *loop_ptr = &ctx->loop; uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; @@ -548,7 +561,8 @@ static bool PowAVX(void *pow_ctx) nonce_to_result(tx_tryte, nonce_tryte, ctx->output_trytes); fail: - /* Free memory */ + /* Free resource */ + uv_rwlock_destroy(&ctx->lock); free(c_state); freeTrobject(tx_tryte); freeTrobject(nonce_trit); diff --git a/src/pow_avx.h b/src/pow_avx.h index 809ecd9..6103e51 100644 --- a/src/pow_avx.h +++ b/src/pow_avx.h @@ -16,7 +16,7 @@ struct _pwork_struct { int mwm; int8_t *nonce; int n; - pthread_mutex_t *lock; + uv_rwlock_t *lock; int *stopPoW; int64_t ret; }; @@ -25,7 +25,7 @@ typedef struct _pow_avx_context PoW_AVX_Context; struct _pow_avx_context { /* Resource of computing */ - pthread_mutex_t lock; + uv_rwlock_t lock; /* Data type of libtuv */ uv_loop_t loop; uv_work_t *work_req; diff --git a/src/pow_c.c b/src/pow_c.c index 9f11fd2..10f6cd9 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -105,13 +105,16 @@ static long long int loop_cpu(uint64_t *lmid, uint64_t *hmid, int m, int8_t *nonce, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { int n = 0; long long int i = 0; uint64_t lcpy[STATE_TRITS_LENGTH * 2], hcpy[STATE_TRITS_LENGTH * 2]; + uv_rwlock_rdlock(lock); for (i = 0; !incr(lmid, hmid) && !*stopPoW; i++) { + uv_rwlock_rdunlock(lock); memcpy(lcpy, lmid, STATE_TRITS_LENGTH * sizeof(uint64_t)); memcpy(hcpy, hmid, STATE_TRITS_LENGTH * sizeof(uint64_t)); transform64(lcpy, hcpy); @@ -120,7 +123,9 @@ static long long int loop_cpu(uint64_t *lmid, seri(lmid, hmid, n, nonce); return i * 64; } + uv_rwlock_rdlock(lock); } + uv_rwlock_rdunlock(lock); return -i * 64 - 1; } @@ -157,7 +162,12 @@ static void incrN(int n, uint64_t *mid_low, uint64_t *mid_high) } } -static int64_t pwork(int8_t mid[], int mwm, int8_t nonce[], int n, int *stopPoW) +static int64_t pwork(int8_t mid[], + int mwm, + int8_t nonce[], + int n, + int *stopPoW, + uv_rwlock_t *lock) { uint64_t lmid[STATE_TRITS_LENGTH] = {0}, hmid[STATE_TRITS_LENGTH] = {0}; para(mid, lmid, hmid); @@ -173,22 +183,22 @@ static int64_t pwork(int8_t mid[], int mwm, int8_t nonce[], int n, int *stopPoW) hmid[offset + 3] = HIGH3; incrN(n, lmid, hmid); - return loop_cpu(lmid, hmid, mwm, nonce, stopPoW); + return loop_cpu(lmid, hmid, mwm, nonce, stopPoW, lock); } static void work_cb(uv_work_t *req) { Pwork_struct *pworkInfo = (Pwork_struct *) req->data; pworkInfo->ret = pwork(pworkInfo->mid, pworkInfo->mwm, pworkInfo->nonce, - pworkInfo->n, pworkInfo->stopPoW); + pworkInfo->n, pworkInfo->stopPoW, pworkInfo->lock); - pthread_mutex_lock(pworkInfo->lock); + uv_rwlock_wrlock(pworkInfo->lock); if (pworkInfo->ret >= 0) { *pworkInfo->stopPoW = 1; /* This means this thread got the result */ pworkInfo->n = -1; } - pthread_mutex_unlock(pworkInfo->lock); + uv_rwlock_wrunlock(pworkInfo->lock); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -258,7 +268,7 @@ bool PowC(void *pow_ctx) ctx->stopPoW = 0; ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; - pthread_mutex_init(&ctx->lock, NULL); + uv_rwlock_init(&ctx->lock); uv_loop_t *loop_ptr = &ctx->loop; uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; @@ -316,7 +326,8 @@ bool PowC(void *pow_ctx) nonce_to_result(tx_tryte, nonce_tryte, ctx->output_trytes); fail: - /* Free memory */ + /* Free resource */ + uv_rwlock_destroy(&ctx->lock); free(c_state); freeTrobject(tx_tryte); freeTrobject(nonce_trit); diff --git a/src/pow_c.h b/src/pow_c.h index 3a6af1a..842c290 100644 --- a/src/pow_c.h +++ b/src/pow_c.h @@ -16,7 +16,7 @@ struct _pwork_struct { int mwm; int8_t *nonce; int n; - pthread_mutex_t *lock; + uv_rwlock_t *lock; int *stopPoW; int index; int64_t ret; @@ -26,7 +26,7 @@ typedef struct _pow_c_context PoW_C_Context; struct _pow_c_context { /* Resource of computing */ - pthread_mutex_t lock; + uv_rwlock_t lock; /* Data type of libtuv */ uv_loop_t loop; uv_work_t *work_req; diff --git a/src/pow_sse.c b/src/pow_sse.c index da0dc1e..e0131c6 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -115,13 +115,16 @@ static int64_t loop128(__m128i *lmid, __m128i *hmid, int m, int8_t *nonce, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { int n = 0; int64_t i = 0; __m128i lcpy[STATE_TRITS_LENGTH * 2], hcpy[STATE_TRITS_LENGTH * 2]; + uv_rwlock_rdlock(lock); for (i = 0; !incr128(lmid, hmid) && !*stopPoW; i++) { + uv_rwlock_rdunlock(lock); for (int j = 0; j < STATE_TRITS_LENGTH; j++) { lcpy[j] = lmid[j]; hcpy[j] = hmid[j]; @@ -134,7 +137,9 @@ static int64_t loop128(__m128i *lmid, seri128(lmid, hmid, n, nonce); return i * 128; } + uv_rwlock_rdlock(lock); } + uv_rwlock_rdunlock(lock); return -i * 128 - 1; } @@ -176,7 +181,8 @@ static int64_t pwork128(int8_t mid[], int mwm, int8_t nonce[], int n, - int *stopPoW) + int *stopPoW, + uv_rwlock_t *lock) { __m128i lmid[STATE_TRITS_LENGTH], hmid[STATE_TRITS_LENGTH]; para128(mid, lmid, hmid); @@ -194,22 +200,23 @@ static int64_t pwork128(int8_t mid[], hmid[offset + 4] = _mm_set_epi64x(HIGH40, HIGH41); incrN128(n, lmid, hmid); - return loop128(lmid, hmid, mwm, nonce, stopPoW); + return loop128(lmid, hmid, mwm, nonce, stopPoW, lock); } static void work_cb(uv_work_t *req) { Pwork_struct *pworkInfo = (Pwork_struct *) req->data; - pworkInfo->ret = pwork128(pworkInfo->mid, pworkInfo->mwm, pworkInfo->nonce, - pworkInfo->n, pworkInfo->stopPoW); + pworkInfo->ret = + pwork128(pworkInfo->mid, pworkInfo->mwm, pworkInfo->nonce, pworkInfo->n, + pworkInfo->stopPoW, pworkInfo->lock); - pthread_mutex_lock(pworkInfo->lock); + uv_rwlock_wrlock(pworkInfo->lock); if (pworkInfo->ret >= 0) { *pworkInfo->stopPoW = 1; /* This means this thread got the result */ pworkInfo->n = -1; } - pthread_mutex_unlock(pworkInfo->lock); + uv_rwlock_wrunlock(pworkInfo->lock); } static int8_t *tx_to_cstate(Trytes_t *tx) @@ -279,7 +286,7 @@ static bool PowSSE(void *pow_ctx) ctx->stopPoW = 0; ctx->pow_info.time = 0; ctx->pow_info.hash_count = 0; - pthread_mutex_init(&ctx->lock, NULL); + uv_rwlock_init(&ctx->lock); uv_loop_t *loop_ptr = &ctx->loop; uv_work_t *work_req = ctx->work_req; Pwork_struct *pitem = ctx->pitem; @@ -337,7 +344,8 @@ static bool PowSSE(void *pow_ctx) nonce_to_result(tx_tryte, nonce_tryte, ctx->output_trytes); fail: - /* Free memory */ + /* Free resource */ + uv_rwlock_destroy(&ctx->lock); free(c_state); freeTrobject(tx_tryte); freeTrobject(nonce_trit); diff --git a/src/pow_sse.h b/src/pow_sse.h index e84a527..160e12c 100644 --- a/src/pow_sse.h +++ b/src/pow_sse.h @@ -16,7 +16,7 @@ struct _pwork_struct { int mwm; int8_t *nonce; int n; - pthread_mutex_t *lock; + uv_rwlock_t *lock; int *stopPoW; int64_t ret; }; @@ -25,7 +25,7 @@ typedef struct _pow_sse_context PoW_SSE_Context; struct _pow_sse_context { /* Resource of computing */ - pthread_mutex_t lock; + uv_rwlock_t lock; /* Data type of libtuv */ uv_loop_t loop; uv_work_t *work_req; From 483354ab4dc791b3c4c64832fb012e911a0e5ad4 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Sun, 24 Mar 2019 20:06:36 +0800 Subject: [PATCH 09/10] build: Allow libtuv to be built on other ARM boards The supported ARM boards are DE10-nano, Arrow SoCKit and Raspberry Pi 2/3 board. Close #126. --- Makefile | 12 ++---------- deps/libtuv | 2 +- docs/build-n-test.md | 2 ++ mk/submodule.mk | 7 ++++++- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 9cddd6a..0987a4e 100644 --- a/Makefile +++ b/Makefile @@ -131,14 +131,6 @@ endif OBJS := $(addprefix $(OUT)/, $(OBJS)) -# Add the libtuv PIC(position independent code) library into the object files -# if the specified hardware is CPU -CPU_PLATFORMS := $(BUILD_AVX) $(BUILD_SSE) $(BUILD_GENERIC) -ENABLE_CPU_PLATFORMS := $(findstring 1,$(CPU_PLATFORMS)) -ifeq ("$(ENABLE_CPU_PLATFORMS)","1") - OBJS += $(LIBTUV_LIBRARY) -endif - $(OUT)/test-%.o: tests/test-%.c $(LIBTUV_PATH)/include $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -I $(SRC) $(LIBTUV_INCLUDE) -c -MMD -MF $@.d $< @@ -147,11 +139,11 @@ $(OUT)/%.o: $(SRC)/%.c $(LIBTUV_PATH)/include $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) $(LIBTUV_INCLUDE) -c -MMD -MF $@.d $< -$(OUT)/test-%: $(OUT)/test-%.o $(OBJS) +$(OUT)/test-%: $(OUT)/test-%.o $(OBJS) $(LIBTUV_LIBRARY) $(VECHO) " LD\t$@\n" $(Q)$(CC) -o $@ $^ $(LDFLAGS) -$(OUT)/libdcurl.so: $(OBJS) +$(OUT)/libdcurl.so: $(OBJS) $(LIBTUV_LIBRARY) $(VECHO) " LD\t$@\n" $(Q)$(CC) -shared -o $@ $^ $(LDFLAGS) diff --git a/deps/libtuv b/deps/libtuv index 1a0518e..e2f7bff 160000 --- a/deps/libtuv +++ b/deps/libtuv @@ -1 +1 @@ -Subproject commit 1a0518e7eada0acabffcf1ebca1d19836d028442 +Subproject commit e2f7bffda59f70fcdccc9e5e8fa37ce2a8c3ffdb diff --git a/docs/build-n-test.md b/docs/build-n-test.md index f05a873..42c2e7c 100644 --- a/docs/build-n-test.md +++ b/docs/build-n-test.md @@ -23,6 +23,8 @@ - ``BUILD_STAT``: show the statistics of the PoW information. - ``BUILD_DEBUG``: dump verbose messages internally. Build with the corresponding Sanitizer to detect software potential bugs if the value is `address`, `undefined` or `thread`. + - ``BOARD``: specify the board which builds the source code. + The supported boards are `de10nano` (DE10-nano board), `arrowsockit` (Arrow SoCKit board), `rpi2` and `rpi3` (Raspberry Pi 2/3 board). * Alternatively, you can specify conditional build as following: ```shell $ make BUILD_GPU=0 BUILD_JNI=1 BUILD_AVX=1 diff --git a/mk/submodule.mk b/mk/submodule.mk index 151d413..c867fcb 100644 --- a/mk/submodule.mk +++ b/mk/submodule.mk @@ -8,11 +8,16 @@ ifeq ($(UNAME_S),Darwin) # macOS UNAME_S := darwin endif +# For de10-nano, arrow sockit and raspberry pi 2/3 board +ifeq ($(UNAME_M),armv7l) + UNAME_M := arm +endif # libtuv related variables LIBTUV_PATH = deps/libtuv LIBTUV_INCLUDE := -I $(LIBTUV_PATH)/include LIBTUV_PLATFORM := $(UNAME_M)-$(UNAME_S) +LIBTUV_BOARD := $(BUILD_BOARD) # PIC (Position-Independent-Code) library LIBTUV_LIBRARY := $(LIBTUV_PATH)/build/$(LIBTUV_PLATFORM)/release/lib/libtuv.o @@ -20,4 +25,4 @@ $(LIBTUV_PATH)/include: git submodule update --init $(LIBTUV_PATH) $(LIBTUV_LIBRARY): - $(MAKE) -C $(LIBTUV_PATH) TUV_BUILD_TYPE=release TUV_CREATE_PIC_LIB=yes + $(MAKE) -C $(LIBTUV_PATH) TUV_BUILD_TYPE=release TUV_CREATE_PIC_LIB=yes TUV_PLATFORM=$(LIBTUV_PLATFORM) TUV_BOARD=$(LIBTUV_BOARD) From db6b6f9e4059174e5d586b6fdc1373f755d8a338 Mon Sep 17 00:00:00 2001 From: marktwtn Date: Thu, 14 Mar 2019 10:04:00 +0800 Subject: [PATCH 10/10] feat: Make thread synchronization portable Replace the thread synchronization functions with the portable libtuv APIs and remove the unused code. TODO: Make mutex of src/compat-ccurl.c portable. Close #107. --- src/dcurl.c | 32 +++++--------------------------- src/implcontext.c | 10 +++++----- src/implcontext.h | 4 ++-- src/pow_avx.c | 14 ++++++-------- src/pow_avx.h | 3 +-- src/pow_c.c | 14 ++++++-------- src/pow_c.h | 3 +-- src/pow_cl.c | 12 ++++++------ src/pow_fpga_accel.c | 1 - src/pow_sse.c | 14 ++++++-------- src/pow_sse.h | 3 +-- 11 files changed, 39 insertions(+), 71 deletions(-) diff --git a/src/dcurl.c b/src/dcurl.c index 645989d..aca0dab 100644 --- a/src/dcurl.c +++ b/src/dcurl.c @@ -17,6 +17,7 @@ #endif #include "implcontext.h" #include "trinary.h" +#include "uv.h" #if defined(ENABLE_AVX) #include "pow_avx.h" #elif defined(ENABLE_SSE) @@ -25,20 +26,9 @@ #include "pow_c.h" #endif -#ifdef __APPLE__ -#include -#else -#include -#endif - /* check whether dcurl is initialized */ static bool isInitialized = false; - -#ifdef __APPLE__ -static dispatch_semaphore_t notify; -#else -static sem_t notify; -#endif +static uv_sem_t notify; LIST_HEAD(IMPL_LIST); @@ -78,11 +68,7 @@ bool dcurl_init() ret &= registerImplContext(&PoWFPGAAccel_Context); #endif -#ifdef __APPLE__ - notify = dispatch_semaphore_create(0); -#else - sem_init(¬ify, 0, 0); -#endif + uv_sem_init(¬ify, 0); return isInitialized = ret; } @@ -118,11 +104,7 @@ int8_t *dcurl_entry(int8_t *trytes, int mwm, int threads) goto do_pow; } } -#ifdef __APPLE__ - dispatch_semaphore_wait(notify, DISPATCH_TIME_FOREVER); -#else - sem_wait(¬ify); -#endif + uv_sem_wait(¬ify); } while (1); do_pow: @@ -133,10 +115,6 @@ int8_t *dcurl_entry(int8_t *trytes, int mwm, int threads) } freePoWContext(impl, pow_ctx); exitImplContext(impl); -#ifdef __APPLE__ - dispatch_semaphore_signal(notify); -#else - sem_post(¬ify); -#endif + uv_sem_post(¬ify); return res; } diff --git a/src/implcontext.c b/src/implcontext.c index b35ed15..d09dbc3 100644 --- a/src/implcontext.c +++ b/src/implcontext.c @@ -30,21 +30,21 @@ void destroyImplContext(ImplContext *impl_ctx) bool enterImplContext(ImplContext *impl_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); if (impl_ctx->num_working_thread >= impl_ctx->num_max_thread) { - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return false; /* Access Failed */ } impl_ctx->num_working_thread++; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return true; /* Access Success */ } void exitImplContext(ImplContext *impl_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); impl_ctx->num_working_thread--; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); } void *getPoWContext(ImplContext *impl_ctx, int8_t *trytes, int mwm, int threads) diff --git a/src/implcontext.h b/src/implcontext.h index dfc5dd7..9a406ce 100644 --- a/src/implcontext.h +++ b/src/implcontext.h @@ -1,11 +1,11 @@ #ifndef IMPL_CTX_H_ #define IMPL_CTX_H_ -#include #include #include #include "common.h" #include "list.h" +#include "uv.h" typedef struct _impl_context ImplContext; @@ -14,7 +14,7 @@ struct _impl_context { char *description; /* Multi-thread Management */ - pthread_mutex_t lock; + uv_mutex_t lock; int bitmap; /* Used to tell which slot is available */ int num_max_thread; int num_working_thread; diff --git a/src/pow_avx.c b/src/pow_avx.c index 1e789e4..25a67f8 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -6,11 +6,9 @@ */ #include "pow_avx.h" -#include #include #include #include -#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -610,7 +608,7 @@ static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); + uv_mutex_init(&impl_ctx->lock); return true; fail: @@ -643,11 +641,11 @@ static void *PoWAVX_getPoWContext(ImplContext *impl_ctx, int mwm, int threads) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); for (int i = 0; i < impl_ctx->num_max_thread; i++) { if (impl_ctx->bitmap & (0x1 << i)) { impl_ctx->bitmap &= ~(0x1 << i); - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); PoW_AVX_Context *ctx = impl_ctx->context + sizeof(PoW_AVX_Context) * i; memcpy(ctx->input_trytes, trytes, TRANSACTION_TRYTES_LENGTH); @@ -660,15 +658,15 @@ static void *PoWAVX_getPoWContext(ImplContext *impl_ctx, return ctx; } } - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return NULL; /* It should not happen */ } static bool PoWAVX_freePoWContext(ImplContext *impl_ctx, void *pow_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); impl_ctx->bitmap |= 0x1 << ((PoW_AVX_Context *) pow_ctx)->indexOfContext; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return true; } diff --git a/src/pow_avx.h b/src/pow_avx.h index 6103e51..7818abc 100644 --- a/src/pow_avx.h +++ b/src/pow_avx.h @@ -1,13 +1,12 @@ #ifndef POW_AVX_H_ #define POW_AVX_H_ -#include #include #include -#include #include "common.h" #include "constants.h" #include "trinary.h" +#include "uv.h" typedef struct _pwork_struct Pwork_struct; diff --git a/src/pow_c.c b/src/pow_c.c index 10f6cd9..5c44e26 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -6,11 +6,9 @@ */ #include "pow_c.h" -#include #include #include #include -#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -374,7 +372,7 @@ static bool PoWC_Context_Initialize(ImplContext *impl_ctx) uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); + uv_mutex_init(&impl_ctx->lock); return true; fail: @@ -407,11 +405,11 @@ static void *PoWC_getPoWContext(ImplContext *impl_ctx, int mwm, int threads) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); for (int i = 0; i < impl_ctx->num_max_thread; i++) { if (impl_ctx->bitmap & (0x1 << i)) { impl_ctx->bitmap &= ~(0x1 << i); - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); PoW_C_Context *ctx = impl_ctx->context + sizeof(PoW_C_Context) * i; memcpy(ctx->input_trytes, trytes, TRANSACTION_TRYTES_LENGTH); ctx->mwm = mwm; @@ -423,15 +421,15 @@ static void *PoWC_getPoWContext(ImplContext *impl_ctx, return ctx; } } - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return NULL; /* It should not happen */ } static bool PoWC_freePoWContext(ImplContext *impl_ctx, void *pow_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); impl_ctx->bitmap |= 0x1 << ((PoW_C_Context *) pow_ctx)->indexOfContext; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return true; } diff --git a/src/pow_c.h b/src/pow_c.h index 842c290..546a6db 100644 --- a/src/pow_c.h +++ b/src/pow_c.h @@ -1,13 +1,12 @@ #ifndef POW_C_H_ #define POW_C_H_ -#include #include #include -#include #include "common.h" #include "constants.h" #include "trinary.h" +#include "uv.h" typedef struct _pwork_struct Pwork_struct; diff --git a/src/pow_cl.c b/src/pow_cl.c index 838ddc0..429d94a 100644 --- a/src/pow_cl.c +++ b/src/pow_cl.c @@ -271,7 +271,7 @@ static bool PoWCL_Context_Initialize(ImplContext *impl_ctx) impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; } impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); + uv_mutex_init(&impl_ctx->lock); return true; fail: @@ -290,11 +290,11 @@ static void *PoWCL_getPoWContext(ImplContext *impl_ctx, int mwm, int threads) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); for (int i = 0; i < impl_ctx->num_max_thread; i++) { if (impl_ctx->bitmap & (0x1 << i)) { impl_ctx->bitmap &= ~(0x1 << i); - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); PoW_CL_Context *ctx = impl_ctx->context + sizeof(PoW_CL_Context) * i; memcpy(ctx->input_trytes, trytes, TRANSACTION_TRYTES_LENGTH); @@ -303,15 +303,15 @@ static void *PoWCL_getPoWContext(ImplContext *impl_ctx, return ctx; } } - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return NULL; /* It should not happen */ } static bool PoWCL_freePoWContext(ImplContext *impl_ctx, void *pow_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); impl_ctx->bitmap |= 0x1 << ((PoW_CL_Context *) pow_ctx)->indexOfContext; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return true; } diff --git a/src/pow_fpga_accel.c b/src/pow_fpga_accel.c index 0054dd7..eb4a644 100644 --- a/src/pow_fpga_accel.c +++ b/src/pow_fpga_accel.c @@ -163,7 +163,6 @@ static bool PoWFPGAAccel_Context_Initialize(ImplContext *impl_ctx) ctx->cpow_map = (uint32_t *) (ctx->fpga_regs_map + CPOW_BASE); impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); return true; diff --git a/src/pow_sse.c b/src/pow_sse.c index e0131c6..b2e77e7 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -7,11 +7,9 @@ #include "pow_sse.h" #include -#include #include #include #include -#include #include "cpu-utils.h" #include "curl.h" #include "implcontext.h" @@ -392,7 +390,7 @@ static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) uv_loop_init(&ctx[i].loop); } impl_ctx->context = ctx; - pthread_mutex_init(&impl_ctx->lock, NULL); + uv_mutex_init(&impl_ctx->lock); return true; fail: @@ -425,11 +423,11 @@ static void *PoWSSE_getPoWContext(ImplContext *impl_ctx, int mwm, int threads) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); for (int i = 0; i < impl_ctx->num_max_thread; i++) { if (impl_ctx->bitmap & (0x1 << i)) { impl_ctx->bitmap &= ~(0x1 << i); - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); PoW_SSE_Context *ctx = impl_ctx->context + sizeof(PoW_SSE_Context) * i; memcpy(ctx->input_trytes, trytes, TRANSACTION_TRYTES_LENGTH); @@ -442,15 +440,15 @@ static void *PoWSSE_getPoWContext(ImplContext *impl_ctx, return ctx; } } - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return NULL; /* It should not happen */ } static bool PoWSSE_freePoWContext(ImplContext *impl_ctx, void *pow_ctx) { - pthread_mutex_lock(&impl_ctx->lock); + uv_mutex_lock(&impl_ctx->lock); impl_ctx->bitmap |= 0x1 << ((PoW_SSE_Context *) pow_ctx)->indexOfContext; - pthread_mutex_unlock(&impl_ctx->lock); + uv_mutex_unlock(&impl_ctx->lock); return true; } diff --git a/src/pow_sse.h b/src/pow_sse.h index 160e12c..73e5af7 100644 --- a/src/pow_sse.h +++ b/src/pow_sse.h @@ -1,13 +1,12 @@ #ifndef POW_SSE_H_ #define POW_SSE_H_ -#include #include #include -#include #include "common.h" #include "constants.h" #include "trinary.h" +#include "uv.h" typedef struct _pwork_struct Pwork_struct;