Skip to content

Commit

Permalink
Merge pull request #125 from marktwtn/trits-trytes-validation-optimize
Browse files Browse the repository at this point in the history
Optimize Trits/trytes validation
  • Loading branch information
jserv committed Mar 19, 2019
2 parents 509bb63 + 2128a6c commit 6374e02
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ else ifeq ("$(BUILD_GENERIC)","1")
CFLAGS += -DENABLE_GENERIC
endif

ifeq ("$(call cpu_feature,SSE4_2)","1")
CFLAGS += -msse4.2
endif

ifeq ("$(BUILD_GPU)","1")
include mk/opencl.mk
endif
Expand Down
11 changes: 11 additions & 0 deletions src/trinary.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
*/

#include "trinary.h"
#if defined(__SSE4_2__)
#include "trinary_sse42.h"
#endif
#include <stdint.h>
#include "constants.h"
#include "curl.h"
Expand All @@ -30,22 +33,30 @@ static bool validateTrits(Trobject_t *trits)
if (trits->type != TYPE_TRITS)
return false;

#if defined(__SSE4_2__)
return validateTrits_sse42(trits);
#else
for (int i = 0; i < trits->len; i++)
if (trits->data[i] < -1 || trits->data[i] > 1)
return false;
return true;
#endif
}

static bool validateTrytes(Trobject_t *trytes)
{
if (trytes->type != TYPE_TRYTES)
return false;

#if defined(__SSE4_2__)
return validateTrytes_sse42(trytes);
#else
for (int i = 0; i < trytes->len; i++)
if ((trytes->data[i] < 'A' || trytes->data[i] > 'Z') &&
trytes->data[i] != '9')
return false;
return true;
#endif
}

Trobject_t *initTrits(int8_t *src, int len)
Expand Down
75 changes: 75 additions & 0 deletions src/trinary_sse42.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifndef TRINARY_SSE42_H_
#define TRINARY_SSE42_H_

#include <nmmintrin.h>

#define BLOCK_8BIT(type) (sizeof(type) / sizeof(int8_t))

static inline bool validateTrits_sse42(Trobject_t *trits)
{
const int block_8bit = BLOCK_8BIT(__m128i);
const int posOneElement = 0x01010101;
const int negOneElement = 0xFFFFFFFF;
const __m128i posOne = _mm_set_epi32(posOneElement, posOneElement,
posOneElement, posOneElement);
const __m128i negOne = _mm_set_epi32(negOneElement, negOneElement,
negOneElement, negOneElement);
/* The for loop handles the group of the 128-bit characters without the
* end-of-string */
for (int i = 0; i < (trits->len) / block_8bit; i++) {
__m128i data = _mm_loadu_si128((__m128i *) (trits->data) + i);
__m128i result = _mm_or_si128(
/* > 1 */
_mm_cmpgt_epi8(data, posOne),
/* < -1 */
_mm_cmplt_epi8(data, negOne));
int notValid = !_mm_test_all_zeros(result, result);
if (notValid)
return false;
}
/* The for loop handles the rest of the characters until the end-of-string
*/
for (int i = ((trits->len) / block_8bit) * block_8bit; i < trits->len;
i++) {
if (trits->data[i] < -1 || trits->data[i] > 1)
return false;
}
return true;
}

static inline bool validateTrytes_sse42(Trobject_t *trytes)
{
const int block_8bit = BLOCK_8BIT(__m128i);
/* Characters from 'A' to 'Z' and '9' to '9' */
const char *range = "AZ99";
__m128i pattern = _mm_loadu_si128((__m128i *) (range));
/* The for loop handles the group of the 128-bit characters without the
* end-of-string */
for (int i = 0; i < (trytes->len) / block_8bit; i++) {
__m128i src = _mm_loadu_si128((__m128i *) (trytes->data) + i);
/* Check whether the characters are in the defined range or not
* Return 0 if all the characters are in the range, otherwise return 1
*/
int notValid = _mm_cmpistrc(pattern, src,
/* Signed byte comparison */
_SIDD_SBYTE_OPS |
/* Compare with the character range */
_SIDD_CMP_RANGES |
/* Negate the comparison result */
_SIDD_MASKED_NEGATIVE_POLARITY);

if (notValid)
return false;
}
/* The for loop handles the rest of the characters until the end-of-string
*/
for (int i = ((trytes->len) / block_8bit) * block_8bit; i < trytes->len;
i++) {
if ((trytes->data[i] < 'A' || trytes->data[i] > 'Z') &&
trytes->data[i] != '9')
return false;
}
return true;
}

#endif

0 comments on commit 6374e02

Please sign in to comment.