diff --git a/.gitmodules b/.gitmodules index 400b111..1e4b6d6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "deps/rabbitmq-c"] path = deps/rabbitmq-c url = https://github.com/alanxz/rabbitmq-c.git +[submodule "deps/sse2neon"] + path = deps/sse2neon + url = https://github.com/DLTcollab/sse2neon.git diff --git a/deps/sse2neon b/deps/sse2neon new file mode 160000 index 0000000..d88df33 --- /dev/null +++ b/deps/sse2neon @@ -0,0 +1 @@ +Subproject commit d88df33bb849188deec044f98d998bb6ec795803 diff --git a/mk/submodule.mk b/mk/submodule.mk index a027d74..566c029 100644 --- a/mk/submodule.mk +++ b/mk/submodule.mk @@ -54,11 +54,21 @@ endif cd $(LIBRABBITMQ_PATH)/build && \ cmake --build . +# sse2neon related variables +SSE2NEON_PATH = deps/sse2neon +SSE2NEON_INCLUDE := -I $(SSE2NEON_PATH) + +$(SSE2NEON_PATH)/sse2neon.h: + git submodule update --init $(SSE2NEON_PATH) + # Submodules SUBS := $(LIBTUV_PATH)/include ifeq ($(BUILD_REMOTE),1) SUBS += $(LIBRABBITMQ_PATH)/librabbitmq endif +ifeq ($(UNAME_M),$(filter $(UNAME_M),arm aarch64)) + SUBS += $(SSE2NEON_PATH)/sse2neon.h +endif # Submodule related objects SUB_OBJS := $(LIBTUV_OBJS) ifeq ($(BUILD_REMOTE),1) @@ -69,3 +79,6 @@ SUB_INCLUDE := $(LIBTUV_INCLUDE) ifeq ($(BUILD_REMOTE),1) SUB_INCLUDE += $(LIBRABBITMQ_INCLUDE) endif +ifeq ($(UNAME_M),$(filter $(UNAME_M),arm aarch64)) + SUB_INCLUDE += $(SSE2NEON_INCLUDE) +endif diff --git a/src/trinary.c b/src/trinary.c index ad75e85..17187cc 100644 --- a/src/trinary.c +++ b/src/trinary.c @@ -7,7 +7,7 @@ */ #include "trinary.h" -#if defined(__SSE4_2__) +#if defined(__SSE4_2__) || defined(__ARM_NEON) #include "trinary_sse42.h" #endif #include @@ -35,7 +35,7 @@ static bool validateTrits(Trobject_t *trits) if (trits->type != TYPE_TRITS) return false; -#if defined(__SSE4_2__) +#if defined(__SSE4_2__) || defined(__ARM_NEON) return validateTrits_sse42(trits); #endif for (int i = 0; i < trits->len; i++) diff --git a/src/trinary_sse42.h b/src/trinary_sse42.h index 9f4684f..e379d06 100644 --- a/src/trinary_sse42.h +++ b/src/trinary_sse42.h @@ -9,8 +9,16 @@ #ifndef TRINARY_SSE42_H_ #define TRINARY_SSE42_H_ +#if defined(__SSE4_2__) #include +#endif #include "constants.h" +#if defined(__ARM_NEON) +#include "sse2neon.h" +#endif +#if !defined(__SSE4_2__) && !defined(__ARM_NEON) +#error "The hardware architecture should support SSE4.2 or NEON instruction." +#endif #define BLOCK_8BIT(type) (sizeof(type) / sizeof(int8_t)) #define BYTE_OF_128BIT 16 @@ -66,6 +74,7 @@ static inline bool validateTrits_sse42(Trobject_t *trits) return true; } +#if defined(__SSE4_2__) static inline bool validateTrytes_sse42(Trobject_t *trytes) { const int block_8bit = BLOCK_8BIT(__m128i); @@ -452,5 +461,6 @@ static inline Trobject_t *trits_from_trytes_sse42(Trobject_t *trytes) return trits; } +#endif // #if defined(__SSE4_2__) #endif