diff --git a/Doxyfile b/Doxyfile index afbcc02..feef397 100644 --- a/Doxyfile +++ b/Doxyfile @@ -17,6 +17,7 @@ INPUT = . \ src FILE_PATTERNS = dcurl.c \ dcurl.h \ + cpu-utils.h \ *.md USE_MDFILE_AS_MAINPAGE = README.md #--------------------------------------------------------------------------- @@ -27,6 +28,10 @@ INLINE_SOURCES = YES REFERENCED_BY_RELATION = YES REFERENCES_RELATION = YES #--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_STATIC = YES +#--------------------------------------------------------------------------- # Configuration options related to the LaTeX output #--------------------------------------------------------------------------- GENERATE_LATEX = NO diff --git a/src/cpu-utils.h b/src/cpu-utils.h index 52f308c..42fd084 100644 --- a/src/cpu-utils.h +++ b/src/cpu-utils.h @@ -15,6 +15,11 @@ #include #endif +/** + * @file cpu-utils.h + * @brief Utility functions for acquiring CPU information. + */ + /* On Mac OS X, define our own get_nprocs_conf() */ #if defined(__APPLE__) || defined(__FreeBSD__) #include @@ -28,7 +33,16 @@ static unsigned int get_nprocs_conf() } #endif -static inline int get_avail_nprocs() +/** + * @brief Get the available number of logical processor. + * + * Detect the logical processor number with `get_nproc_conf()` + * or get the number with the environment variable **DCURL_NUM_CPU**. + * @return The available number of logical processor. + * If the environment variable **DCURL_NUM_CPU** is not set, return the + * **maximum logical processor number - 1**. + */ +static inline int get_avail_logic_nprocs() { size_t nproc = get_nprocs_conf() - 1; @@ -55,3 +69,62 @@ static inline int get_avail_nprocs() nproc = 1; return nproc; } + +/** + * @brief Get the thread number per physical processor. + * + * - GNU/Linux: Acquire the thread number by parsing the CPU information. + * - macOS: Acquire the thread number by doing the calculation of + * (logical processor number / physical processor number). + * @return The thread number per physical processor. + * @retval 1 Hyperthreading disabled. + * @retval 2 Hyperthreading enabled. + * @retval -1 Unexpected error. + */ +static inline int get_nthds_per_physic_proc() +{ + FILE *fd; + int nthread; +#if defined(__linux__) + char nthd[4]; + + fd = popen("LC_ALL=C lscpu | grep 'Thread(s) per core' | awk '{printf $4}'", + "r"); + if (fd == NULL) + return -1; + if (fgets(nthd, sizeof(nthd), fd) == NULL) + return -1; + nthread = (int) strtol(nthd, NULL, 10); + if (errno == ERANGE || nthread == 0) { + return -1; + } +#elif defined(__APPLE__) + char p_proc[4], l_proc[4]; + int physic_proc, logic_proc; + + fd = popen("sysctl hw.physicalcpu | awk '{printf $2}'", "r"); + if (fd == NULL) + return -1; + if (fgets(p_proc, sizeof(p_proc), fd) == NULL) + return -1; + fd = popen("sysctl hw.logicalcpu | awk '{printf $2}'", "r"); + if (fd == NULL) + return -1; + if (fgets(l_proc, sizeof(l_proc), fd) == NULL) + return -1; + physic_proc = (int) strtol(p_proc, NULL, 10); + if (errno == ERANGE || physic_proc == 0) { + return -1; + } + logic_proc = (int) strtol(l_proc, NULL, 10); + if (errno == ERANGE || logic_proc == 0) { + return -1; + } + + nthread = logic_proc / physic_proc; +#endif + + if (pclose(fd) == -1) + return -1; + return nthread; +} diff --git a/src/pow_avx.c b/src/pow_avx.c index dbc7325..a1c8332 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -573,7 +573,8 @@ static bool PowAVX(void *pow_ctx) static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) { - int nproc = get_avail_nprocs(); + impl_ctx->num_max_thread = get_nthds_per_physic_proc(); + int nproc = get_avail_logic_nprocs() / impl_ctx->num_max_thread; if (impl_ctx->num_max_thread <= 0 || nproc <= 0) return false; @@ -609,7 +610,7 @@ static bool PoWAVX_Context_Initialize(ImplContext *impl_ctx) impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; uv_loop_init(&ctx[i].loop); } - uv_set_threadpool_size(nproc); + uv_set_threadpool_size(impl_ctx->num_max_thread * nproc); impl_ctx->context = ctx; uv_mutex_init(&impl_ctx->lock); return true; @@ -693,7 +694,7 @@ ImplContext PoWAVX_Context = { .context = NULL, .description = "CPU (Intel AVX)", .bitmap = 0, - .num_max_thread = 2, + .num_max_thread = 0, .num_working_thread = 0, .initialize = PoWAVX_Context_Initialize, .destroy = PoWAVX_Context_Destroy, diff --git a/src/pow_c.c b/src/pow_c.c index 910bcdb..f0d2230 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -337,7 +337,8 @@ bool PowC(void *pow_ctx) static bool PoWC_Context_Initialize(ImplContext *impl_ctx) { - int nproc = get_avail_nprocs(); + impl_ctx->num_max_thread = get_nthds_per_physic_proc(); + int nproc = get_avail_logic_nprocs() / impl_ctx->num_max_thread; if (impl_ctx->num_max_thread <= 0 || nproc <= 0) return false; @@ -373,7 +374,7 @@ static bool PoWC_Context_Initialize(ImplContext *impl_ctx) impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; uv_loop_init(&ctx[i].loop); } - uv_set_threadpool_size(nproc); + uv_set_threadpool_size(impl_ctx->num_max_thread * nproc); impl_ctx->context = ctx; uv_mutex_init(&impl_ctx->lock); return true; @@ -456,7 +457,7 @@ ImplContext PoWC_Context = { .context = NULL, .description = "CPU (Pure C)", .bitmap = 0, - .num_max_thread = 2, + .num_max_thread = 0, .num_working_thread = 0, .initialize = PoWC_Context_Initialize, .destroy = PoWC_Context_Destroy, diff --git a/src/pow_sse.c b/src/pow_sse.c index a6a0a2a..6556a44 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -355,7 +355,8 @@ static bool PowSSE(void *pow_ctx) static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) { - int nproc = get_avail_nprocs(); + impl_ctx->num_max_thread = get_nthds_per_physic_proc(); + int nproc = get_avail_logic_nprocs() / impl_ctx->num_max_thread; if (impl_ctx->num_max_thread <= 0 || nproc <= 0) return false; @@ -391,7 +392,7 @@ static bool PoWSSE_Context_Initialize(ImplContext *impl_ctx) impl_ctx->bitmap = impl_ctx->bitmap << 1 | 0x1; uv_loop_init(&ctx[i].loop); } - uv_set_threadpool_size(nproc); + uv_set_threadpool_size(impl_ctx->num_max_thread * nproc); impl_ctx->context = ctx; uv_mutex_init(&impl_ctx->lock); return true; @@ -475,7 +476,7 @@ ImplContext PoWSSE_Context = { .context = NULL, .description = "CPU (Intel SSE)", .bitmap = 0, - .num_max_thread = 2, + .num_max_thread = 0, .num_working_thread = 0, .initialize = PoWSSE_Context_Initialize, .destroy = PoWSSE_Context_Destroy,