Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify PoW implementation context #59

Merged
merged 21 commits into from
Aug 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
222e86a
Prototype of new interface design is introduced
furuame Aug 10, 2018
15dabf7
The linked list for ImplContext is introduced
furuame Aug 12, 2018
c341923
Reduce the number of working thread when leaving
furuame Aug 12, 2018
9f3de39
Unit test for pow_sse is updated for new interface
furuame Aug 12, 2018
bb845d4
Fix the improper initialization of SSE context
furuame Aug 13, 2018
5cae697
Update the multi-thread pow test for new interface
furuame Aug 13, 2018
9e49f23
The use of semaphore boosts the performance
furuame Aug 13, 2018
c7efd7d
Update the C implementation for new interface
furuame Aug 13, 2018
b5f29c0
Update the AVX implementation for new interface
furuame Aug 13, 2018
c02b45e
Let dcurl configure every GPU when initializing
furuame Aug 14, 2018
32158a9
Update the OpenCL implementation for new interface
furuame Aug 14, 2018
f12c41b
Add comment
furuame Aug 14, 2018
14ff0af
Remove duplicate multi-pow test case
furuame Aug 14, 2018
12c0702
Free the allocated memory and handle the exception
furuame Aug 14, 2018
eb618db
Make the test case more robust
furuame Aug 15, 2018
ddd977f
Let constants be referred in the unified header
furuame Aug 16, 2018
364500b
Replace numbers with mnemonic enum
furuame Aug 16, 2018
365c712
Free the memory used to query OpenCL platforms
furuame Aug 16, 2018
04c73d5
Replace the number (0/1) with boolean type
furuame Aug 16, 2018
ffd47c7
Handle the uninitialized situation
furuame Aug 16, 2018
dd26cba
Rename some varaible to avoid ambiguity
furuame Aug 16, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,26 @@ endif

TESTS = \
trinary \
curl
curl \
dcurl \
multi-pow

ifeq ("$(BUILD_AVX)","1")
TESTS += \
pow_avx \
multi_pow_cpu
pow_avx
else
ifeq ("$(BUILD_SSE)","1")
TESTS += \
pow_sse \
multi_pow_cpu
pow_sse
else
TESTS += \
pow_c \
multi_pow_cpu
pow_c
endif
endif

ifeq ("$(BUILD_GPU)","1")
TESTS += \
pow_cl \
multi_pow_gpu
pow_cl
endif

ifeq ("$(BUILD_COMPAT)", "1")
Expand All @@ -85,7 +83,8 @@ OBJS = \
curl.o \
constants.o \
trinary.o \
dcurl.o
dcurl.o \
implcontext.o

ifeq ("$(BUILD_AVX)","1")
OBJS += pow_avx.o
Expand Down
133 changes: 88 additions & 45 deletions src/clcontext.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,29 @@

#include "clcontext.h"
#include <stdio.h>
#include <stdbool.h>
#include "pearl.cl.h"
#include "constants.h"

static int init_cl_devices(CLContext *ctx)
static bool init_cl_devices(CLContext *ctx)
{
cl_uint num_platform = 0;
cl_int errno;
cl_platform_id *platform;

errno = clGetPlatformIDs(0, NULL, &num_platform);

if (errno != CL_SUCCESS)
return 0; /* Cannot get # of OpenCL platform */

/* We only need one Platform */
platform = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platform);
clGetPlatformIDs(num_platform, platform, NULL);

/* Get Device IDs */
cl_uint platform_num_device;
if (clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, 1, &ctx->device,
&platform_num_device) != CL_SUCCESS)
return 0; /* Failed to get OpenCL Device IDs in platform */

/* Create OpenCL context */
ctx->context =
(cl_context) clCreateContext(NULL, 1, &ctx->device, NULL, NULL, &errno);
if (errno != CL_SUCCESS)
return 0; /* Failed to create OpenCL Context */
return false; /* Failed to create OpenCL Context */

/* Get Device Info (num_cores) */
if (CL_SUCCESS != clGetDeviceInfo(ctx->device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), &ctx->num_cores, NULL))
return 0; /* Failed to get num_cores of GPU */
return false; /* Failed to get num_cores of GPU */

/* Get Device Info (max_memory) */
if (CL_SUCCESS != clGetDeviceInfo(ctx->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), &ctx->max_memory, NULL))
return 0; /* Failed to get Max memory of GPU */
return false; /* Failed to get Max memory of GPU */

/* Get Device Info (num work group) */
if (CL_SUCCESS !=
Expand All @@ -55,14 +40,12 @@ static int init_cl_devices(CLContext *ctx)
/* Create Command Queue */
ctx->cmdq = clCreateCommandQueue(ctx->context, ctx->device, 0, &errno);
if (errno != CL_SUCCESS)
return 0; /* Failed to create command queue */
return false; /* Failed to create command queue */

free(platform);
return 1;
return true;
}


static int init_cl_program(CLContext *ctx)
static bool init_cl_program(CLContext *ctx)
{
unsigned char *source_str = pearl_cl;
size_t source_size = pearl_cl_len;
Expand All @@ -72,29 +55,30 @@ static int init_cl_program(CLContext *ctx)
ctx->context, ctx->kernel_info.num_src, (const char **) &source_str,
(const size_t *) &source_size, &errno);
if (CL_SUCCESS != errno)
return 0; /* Failed to create OpenCL program */
return false; /* Failed to create OpenCL program */

errno =
clBuildProgram(ctx->program, 1, &ctx->device, "-Werror", NULL, NULL);
if (CL_SUCCESS != errno)
return 0; /* Failed to build OpenCL program */
return false; /* Failed to build OpenCL program */

return 1;
return true;
}

int init_cl_kernel(CLContext *ctx, char **kernel_name)
static bool init_cl_kernel(CLContext *ctx)
{
char *kernel_name[] = {"init", "search", "finalize"};
cl_int errno;

for (int i = 0; i < ctx->kernel_info.num_kernels; i++) {
ctx->kernel[i] = clCreateKernel(ctx->program, kernel_name[i], &errno);
if (CL_SUCCESS != errno)
return 0; /* Failed to create OpenCL kernel */
return false; /* Failed to create OpenCL kernel */
}
return 1;
return true;
}

int init_cl_buffer(CLContext *ctx)
static bool init_cl_buffer(CLContext *ctx)
{
cl_ulong mem = 0, max_mem = 0;
cl_int errno;
Expand All @@ -113,35 +97,94 @@ int init_cl_buffer(CLContext *ctx)
/* Check Memory bound */
max_mem += mem;
if (max_mem >= ctx->max_memory)
return 0; /* GPU Memory is not enough */
return false; /* GPU Memory is not enough */

/* Create OpenCL Buffer */
ctx->buffer[i] =
clCreateBuffer(ctx->context, ctx->kernel_info.buffer_info[i].flags,
mem, NULL, &errno);
if (CL_SUCCESS != errno)
return 0; /* Failed to create OpenCL Memory Buffer */
return false; /* Failed to create OpenCL Memory Buffer */

/* Set Kernel Arguments */
for (int j = 0; j < ctx->kernel_info.num_kernels; j++) {
if (CL_SUCCESS != clSetKernelArg(ctx->kernel[j], i, sizeof(cl_mem),
(void *) &ctx->buffer[i]))
return 0; /* Failed to set OpenCL kernel arguments */
return false; /* Failed to set OpenCL kernel arguments */
}
}
return 1;
return true;
}

static bool init_BufferInfo(CLContext *ctx)
{
ctx->kernel_info.buffer_info[INDEX_OF_TRIT_HASH] =
(BufferInfo){sizeof(char) * HASH_LENGTH, CL_MEM_WRITE_ONLY};
ctx->kernel_info.buffer_info[INDEX_OF_MID_LOW] =
(BufferInfo){sizeof(int64_t) * STATE_LENGTH, CL_MEM_READ_WRITE, 2};
ctx->kernel_info.buffer_info[INDEX_OF_MID_HIGH] =
(BufferInfo){sizeof(int64_t) * STATE_LENGTH, CL_MEM_READ_WRITE, 2};
ctx->kernel_info.buffer_info[INDEX_OF_STATE_LOW] =
(BufferInfo){sizeof(int64_t) * STATE_LENGTH, CL_MEM_READ_WRITE, 2};
ctx->kernel_info.buffer_info[INDEX_OF_STATE_HIGH] =
(BufferInfo){sizeof(int64_t) * STATE_LENGTH, CL_MEM_READ_WRITE, 2};
ctx->kernel_info.buffer_info[INDEX_OF_MWM] =
(BufferInfo){sizeof(size_t), CL_MEM_READ_ONLY};
ctx->kernel_info.buffer_info[INDEX_OF_FOUND] =
(BufferInfo){sizeof(char), CL_MEM_READ_WRITE};
ctx->kernel_info.buffer_info[INDEX_OF_NONCE_PROBE] =
(BufferInfo){sizeof(int64_t), CL_MEM_READ_WRITE, 2};
ctx->kernel_info.buffer_info[INDEX_OF_LOOP_COUNT] =
(BufferInfo){sizeof(size_t), CL_MEM_READ_ONLY};

return init_cl_buffer(ctx);
}

static bool set_clcontext(CLContext *ctx, cl_device_id device)
{
ctx->device = device;
ctx->kernel_info.num_buffers = 9;
ctx->kernel_info.num_kernels = 3;
ctx->kernel_info.num_src = 1;

return init_cl_devices(ctx) && init_cl_program(ctx);
}

int init_clcontext(CLContext **ctx)
int init_clcontext(CLContext *ctx)
{
*ctx = (CLContext *) malloc(sizeof(CLContext));
int ctx_idx = 0;

if (!(*ctx))
return 0;
cl_uint num_platform = 0;
cl_platform_id *platform = NULL;

(*ctx)->kernel_info.num_buffers = 9;
(*ctx)->kernel_info.num_kernels = 3;
(*ctx)->kernel_info.num_src = 1;
/* Get the platform */
clGetPlatformIDs(0, NULL, &num_platform);
platform = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platform);
if (!platform) return 0;
clGetPlatformIDs(num_platform, platform, NULL);

return init_cl_devices(*ctx) && init_cl_program(*ctx);
cl_uint num_devices = 0;
cl_device_id *devices = NULL;

/* Iterate the platform list and get its devices */
for (int i = 0; i < num_platform; i++) {
clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
devices = (cl_device_id *) malloc(sizeof(cl_device_id) * num_devices);
if (!devices) goto leave;
clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
for (int j = 0; j < num_devices; j++, ctx_idx++) {
int ret = 1;
ret &= set_clcontext(&ctx[ctx_idx], devices[j]);
ret &= init_cl_kernel(&ctx[ctx_idx]);
ret &= init_BufferInfo(&ctx[ctx_idx]);
if (!ret) {
free(devices);
goto leave;
}
}
free(devices);
}
leave:
free(platform);
return ctx_idx;
}
25 changes: 21 additions & 4 deletions src/clcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ typedef struct {
size_t num_src;
} KernelInfo;

/* Every GPU device has own CLContext */
typedef struct {
cl_uint num_devices;
cl_device_id device;
cl_command_queue cmdq;
cl_mem buffer[MAX_NUM_BUFFERS];
Expand All @@ -43,9 +43,26 @@ typedef struct {
KernelInfo kernel_info;
} CLContext;

int init_clcontext(CLContext **ctx);
int init_cl_kernel(CLContext *ctx, char **kernel_name);
int init_cl_buffer(CLContext *ctx);
enum {
INDEX_OF_TRIT_HASH,
INDEX_OF_MID_LOW,
INDEX_OF_MID_HIGH,
INDEX_OF_STATE_LOW,
INDEX_OF_STATE_HIGH,
INDEX_OF_MWM,
INDEX_OF_FOUND,
INDEX_OF_NONCE_PROBE,
INDEX_OF_LOOP_COUNT
};

enum {
INDEX_OF_KERNEL_INIT,
INDEX_OF_KERNEL_SEARCH,
INDEX_OF_KERNEL_FINALIZE,
};

/* return the number of available device */
int init_clcontext(CLContext *ctx);

#define KERNEL_PATH "./src/pow_kernel.cl"

Expand Down
5 changes: 5 additions & 0 deletions src/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
#define Depth 3
#define Radix 3

#define HASH_LENGTH 243 // trits
#define NONCE_LENGTH 81 // trits
#define STATE_LENGTH 3 * HASH_LENGTH // trits
#define TRANSACTION_LENGTH 2673 * 3 // trits

#define SignatureMessageFragmentTrinaryOffset 0
#define SignatureMessageFragmentTrinarySize 6561
#define AddressTrinaryOffset \
Expand Down
Loading