diff --git a/src/pow_avx.c b/src/pow_avx.c index 4502a47..0136963 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -20,7 +20,7 @@ static void transform256(__m256i *lmid, __m256i *hmid) { __m256i one = _mm256_set_epi64x(HBITS, HBITS, HBITS, HBITS); int t1, t2; - __m256i alpha, beta, gamma, delta, ngamma; + __m256i alpha, beta, delta; __m256i *lto = lmid + STATE_TRITS_LENGTH, *hto = hmid + STATE_TRITS_LENGTH; __m256i *lfrom = lmid, *hfrom = hmid; @@ -31,18 +31,12 @@ static void transform256(__m256i *lmid, __m256i *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - ngamma = _mm256_andnot_si256(gamma, one); - delta = _mm256_and_si256( - _mm256_or_si256(alpha, ngamma), - _mm256_xor_si256( - lfrom[t2], - beta)); /* (alpha | (~gamma)) & (lfrom[t2] ^ beta) */ - - - lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ - hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, gamma), - delta); /* (alpha ^ gamma) | delta */ + delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ + + lto[j] = _mm256_andnot_si256( + _mm256_and_si256(delta, alpha), one); /* ~(delta & alpha) */ + hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), + delta); /* (alpha ^ hfrom[t2]) | delta */ } __m256i *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -55,15 +49,12 @@ static void transform256(__m256i *lmid, __m256i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - ngamma = _mm256_andnot_si256(gamma, one); - delta = _mm256_and_si256( - _mm256_or_si256(alpha, ngamma), - _mm256_xor_si256( - lfrom[t2], beta)); /* (alpha | (~gamma)) & (lfrom[t2] ^ beta) */ - lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ - hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, gamma), - delta); /* (alpha ^ gamma) | delta */ + delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ + + lto[j] = _mm256_andnot_si256( + _mm256_and_si256(delta, alpha), one); /* ~(delta & alpha) */ + hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), + delta); /* (alpha ^ hfrom[t2]) | delta */ } } @@ -215,7 +206,7 @@ void transform256(__m256d *lmid, __m256d *hmid) { __m256d one = _mm256_set_pd(HBITS, HBITS, HBITS, HBITS); int j, r, t1, t2; - __m256d alpha, beta, gamma, delta, ngamma; + __m256d alpha, beta, delta; __m256d *lto = lmid + STATE_TRITS_LENGTH, *hto = hmid + STATE_TRITS_LENGTH; __m256d *lfrom = lmid, *hfrom = hmid; for (r = 0; r < 80; r++) { @@ -225,18 +216,12 @@ void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - ngamma = _mm256_andnot_pd(gamma, one); - delta = _mm256_and_pd( - _mm256_or_pd(alpha, ngamma), - _mm256_xor_pd( - lfrom[t2], - beta)); //(alpha | (~gamma)) & (lfrom[t2] ^ beta); - - - lto[j] = _mm256_andnot_pd(delta, one); //~delta; - hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, gamma), - delta); //(alpha ^ gamma) | delta; + delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ + + lto[j] = _mm256_andnot_pd( + _mm256_and_pd(delta, alpha), one); /* ~(delta & alpha) */ + hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), + delta); /* (alpha ^ hfrom[t2]) | delta */ } __m256d *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -250,17 +235,12 @@ void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - ngamma = _mm256_andnot_pd(gamma, one); - delta = _mm256_and_pd( - _mm256_or_pd(alpha, ngamma), - _mm256_xor_pd(lfrom[t2], - beta)); //(alpha | (~gamma)) & (lfrom[t2] ^ beta); - + delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - lto[j] = _mm256_andnot_pd(delta, one); //~delta; - hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, gamma), - delta); //(alpha ^ gamma) | delta; + lto[j] = _mm256_andnot_pd( + _mm256_and_pd(delta, alpha), one); /* ~(delta & alpha) */ + hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), + delta); /* (alpha ^ hfrom[t2]) | delta */ } } diff --git a/src/pow_c.c b/src/pow_c.c index 943b6e9..ef20717 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -17,7 +17,7 @@ void transform64(uint64_t *lmid, uint64_t *hmid) { - uint64_t alpha, beta, gamma, delta; + uint64_t alpha, beta, delta; uint64_t *lfrom = lmid, *hfrom = hmid; uint64_t *lto = lmid + STATE_TRITS_LENGTH, *hto = hmid + STATE_TRITS_LENGTH; @@ -27,10 +27,9 @@ void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - delta = (alpha | (~gamma)) & (lfrom[t2] ^ beta); - lto[j] = ~delta; - hto[j] = (alpha ^ gamma) | delta; + delta = beta ^ lfrom[t2]; + lto[j] = ~(delta & alpha); + hto[j] = delta | (alpha ^ hfrom[t2]); } uint64_t *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -44,10 +43,9 @@ void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - delta = (alpha | (~gamma)) & (lfrom[t2] ^ beta); - lto[j] = ~delta; // 6 - hto[j] = (alpha ^ gamma) | delta; + delta = beta ^ lfrom[t2]; + lto[j] = ~(delta & alpha); + hto[j] = delta | (alpha ^ hfrom[t2]); } } diff --git a/src/pow_kernel.cl b/src/pow_kernel.cl index dac198a..10d5aa8 100644 --- a/src/pow_kernel.cl +++ b/src/pow_kernel.cl @@ -127,18 +127,16 @@ void transform(__global bc_trit_t* state_low, __global bc_trit_t* state_high, __private size_t id, __private size_t l_size, __private size_t n_trits) { __private size_t round, i, j, k; - __private bc_trit_t alpha, beta, gamma, delta, sp_low[3], sp_high[3]; + __private bc_trit_t alpha, beta, delta, sp_low[3], sp_high[3]; for (round = 0; round < NUMBER_OF_ROUNDS; round++) { for (i = 0; i < n_trits; i++) { j = id + i * l_size; k = j+1; alpha = state_low[INDEX[j]]; beta = state_high[INDEX[j]]; - gamma = state_high[INDEX[k]]; - delta = (alpha | (~gamma)) & (state_low[INDEX[k]] ^ beta); - - sp_low[i] = ~delta; - sp_high[i] = (alpha ^ gamma) | delta; + delta = beta ^ state_low[INDEX[k]]; + sp_low[i] = ~(delta & alpha); + sp_high[i] = delta | (alpha ^ state_high[INDEX[k]]); } barrier(CLK_LOCAL_MEM_FENCE); for (i = 0; i < n_trits; i++) { diff --git a/src/pow_sse.c b/src/pow_sse.c index d46e2e2..b2365c9 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -19,7 +19,7 @@ static void transform128(__m128i *lmid, __m128i *hmid) { int t1, t2; - __m128i alpha, beta, gamma, delta; + __m128i alpha, beta, delta; __m128i *lto = lmid + STATE_TRITS_LENGTH, *hto = hmid + STATE_TRITS_LENGTH; __m128i *lfrom = lmid, *hfrom = hmid; @@ -29,10 +29,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - delta = (alpha | (~gamma)) & (lfrom[t2] ^ beta); - lto[j] = ~delta; - hto[j] = (alpha ^ gamma) | delta; + delta = beta ^ lfrom[t2]; + lto[j] = ~(delta & alpha); + hto[j] = delta | (alpha ^ hfrom[t2]); } __m128i *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -45,10 +44,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - gamma = hfrom[t2]; - delta = (alpha | (~gamma)) & (lfrom[t2] ^ beta); - lto[j] = ~delta; - hto[j] = (alpha ^ gamma) | delta; + delta = beta ^ lfrom[t2]; + lto[j] = ~(delta & alpha); + hto[j] = delta | (alpha ^ hfrom[t2]); } }