diff --git a/src/pow_avx.c b/src/pow_avx.c index 83aba86..faa5cec 100644 --- a/src/pow_avx.c +++ b/src/pow_avx.c @@ -31,10 +31,9 @@ static void transform256(__m256i *lmid, __m256i *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -49,10 +48,9 @@ static void transform256(__m256i *lmid, __m256i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */ hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -223,10 +221,9 @@ static void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */ hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } @@ -242,10 +239,9 @@ static void transform256(__m256d *lmid, __m256d *hmid) alpha = lfrom[t1]; beta = hfrom[t1]; - delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */ - - lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha), - one); /* ~(delta & alpha) */ + delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta)); + /* alpha & (lfrom[t2] ^ beta) */ + lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */ hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]), delta); /* (alpha ^ hfrom[t2]) | delta */ } diff --git a/src/pow_c.c b/src/pow_c.c index cbf2d81..9f11fd2 100644 --- a/src/pow_c.c +++ b/src/pow_c.c @@ -27,9 +27,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (lfrom[t2] ^ beta); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } uint64_t *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -43,9 +43,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid) int t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (lfrom[t2] ^ beta); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } } diff --git a/src/pow_kernel.cl b/src/pow_kernel.cl index 10d5aa8..32b7946 100644 --- a/src/pow_kernel.cl +++ b/src/pow_kernel.cl @@ -134,9 +134,9 @@ void transform(__global bc_trit_t* state_low, __global bc_trit_t* state_high, k = j+1; alpha = state_low[INDEX[j]]; beta = state_high[INDEX[j]]; - delta = beta ^ state_low[INDEX[k]]; - sp_low[i] = ~(delta & alpha); - sp_high[i] = delta | (alpha ^ state_high[INDEX[k]]); + delta = alpha & (beta ^ state_low[INDEX[k]]); + sp_low[i] = ~delta; + sp_high[i] = (alpha ^ state_high[INDEX[k]]) | delta; } barrier(CLK_LOCAL_MEM_FENCE); for (i = 0; i < n_trits; i++) { diff --git a/src/pow_sse.c b/src/pow_sse.c index eaea2f1..da0dc1e 100644 --- a/src/pow_sse.c +++ b/src/pow_sse.c @@ -29,9 +29,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (beta ^ lfrom[t2]); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } __m128i *lswap = lfrom, *hswap = hfrom; lfrom = lto; @@ -44,9 +44,9 @@ static void transform128(__m128i *lmid, __m128i *hmid) t2 = indices[j + 1]; alpha = lfrom[t1]; beta = hfrom[t1]; - delta = beta ^ lfrom[t2]; - lto[j] = ~(delta & alpha); - hto[j] = delta | (alpha ^ hfrom[t2]); + delta = alpha & (beta ^ lfrom[t2]); + lto[j] = ~delta; + hto[j] = (alpha ^ hfrom[t2]) | delta; } }