Skip to content

Commit

Permalink
feat: Allow ARM architecture to use SIMD trit validation
Browse files Browse the repository at this point in the history
A new project sse2neon is added as git submodule to allow dcurl running
on ARM architecture to use SIMD acceleration without writing NEON
intrinsic functions.

Close #39.
  • Loading branch information
marktwtn committed Sep 16, 2019
1 parent 7b8a253 commit 4fd73f9
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "deps/rabbitmq-c"]
path = deps/rabbitmq-c
url = https://github.com/alanxz/rabbitmq-c.git
[submodule "deps/sse2neon"]
path = deps/sse2neon
url = https://github.com/DLTcollab/sse2neon.git
1 change: 1 addition & 0 deletions deps/sse2neon
Submodule sse2neon added at d88df3
13 changes: 13 additions & 0 deletions mk/submodule.mk
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,21 @@ endif
cd $(LIBRABBITMQ_PATH)/build && \
cmake --build .

# sse2neon related variables
SSE2NEON_PATH = deps/sse2neon
SSE2NEON_INCLUDE := -I $(SSE2NEON_PATH)

$(SSE2NEON_PATH)/sse2neon.h:
git submodule update --init $(SSE2NEON_PATH)

# Submodules
SUBS := $(LIBTUV_PATH)/include
ifeq ($(BUILD_REMOTE),1)
SUBS += $(LIBRABBITMQ_PATH)/librabbitmq
endif
ifeq ($(UNAME_M),$(filter $(UNAME_M),arm aarch64))
SUBS += $(SSE2NEON_PATH)/sse2neon.h
endif
# Submodule related objects
SUB_OBJS := $(LIBTUV_OBJS)
ifeq ($(BUILD_REMOTE),1)
Expand All @@ -69,3 +79,6 @@ SUB_INCLUDE := $(LIBTUV_INCLUDE)
ifeq ($(BUILD_REMOTE),1)
SUB_INCLUDE += $(LIBRABBITMQ_INCLUDE)
endif
ifeq ($(UNAME_M),$(filter $(UNAME_M),arm aarch64))
SUB_INCLUDE += $(SSE2NEON_INCLUDE)
endif
4 changes: 2 additions & 2 deletions src/trinary.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/

#include "trinary.h"
#if defined(__SSE4_2__)
#if defined(__SSE4_2__) || defined(__ARM_NEON)
#include "trinary_sse42.h"
#endif
#include <stdint.h>
Expand Down Expand Up @@ -35,7 +35,7 @@ static bool validateTrits(Trobject_t *trits)
if (trits->type != TYPE_TRITS)
return false;

#if defined(__SSE4_2__)
#if defined(__SSE4_2__) || defined(__ARM_NEON)
return validateTrits_sse42(trits);
#endif
for (int i = 0; i < trits->len; i++)
Expand Down
10 changes: 10 additions & 0 deletions src/trinary_sse42.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,16 @@
#ifndef TRINARY_SSE42_H_
#define TRINARY_SSE42_H_

#if defined(__SSE4_2__)
#include <nmmintrin.h>
#endif
#include "constants.h"
#if defined(__ARM_NEON)
#include "sse2neon.h"
#endif
#if !defined(__SSE4_2__) && !defined(__ARM_NEON)
#error "The hardware architecture should support SSE4.2 or NEON instruction."
#endif

#define BLOCK_8BIT(type) (sizeof(type) / sizeof(int8_t))
#define BYTE_OF_128BIT 16
Expand Down Expand Up @@ -66,6 +74,7 @@ static inline bool validateTrits_sse42(Trobject_t *trits)
return true;
}

#if defined(__SSE4_2__)
static inline bool validateTrytes_sse42(Trobject_t *trytes)
{
const int block_8bit = BLOCK_8BIT(__m128i);
Expand Down Expand Up @@ -452,5 +461,6 @@ static inline Trobject_t *trits_from_trytes_sse42(Trobject_t *trytes)

return trits;
}
#endif // #if defined(__SSE4_2__)

#endif

0 comments on commit 4fd73f9

Please sign in to comment.