Skip to content

Commit

Permalink
fix: rearrange bitwise operations while transform
Browse files Browse the repository at this point in the history
While optimized transform function holds same results, operations in
each assignments are not the same number. So we reduce and rearrange
some operators back to original positions. Improved performance is still
valid.

Fix #120
  • Loading branch information
Yu Wei Wu committed Mar 7, 2019
1 parent 85dbc30 commit 0fcc94c
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 31 deletions.
28 changes: 12 additions & 16 deletions src/pow_avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ static void transform256(__m256i *lmid, __m256i *hmid)

alpha = lfrom[t1];
beta = hfrom[t1];
delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */

lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha),
one); /* ~(delta & alpha) */
delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta));
/* alpha & (lfrom[t2] ^ beta) */
lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */
hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]),
delta); /* (alpha ^ hfrom[t2]) | delta */
}
Expand All @@ -49,10 +48,9 @@ static void transform256(__m256i *lmid, __m256i *hmid)
t2 = indices[j + 1];
alpha = lfrom[t1];
beta = hfrom[t1];
delta = _mm256_xor_si256(lfrom[t2], beta); /* lfrom[t2] ^ beta */

lto[j] = _mm256_andnot_si256(_mm256_and_si256(delta, alpha),
one); /* ~(delta & alpha) */
delta = _mm256_and_si256( alpha, _mm256_xor_si256(lfrom[t2], beta));
/* alpha & (lfrom[t2] ^ beta) */
lto[j] = _mm256_andnot_si256(delta, one); /* ~delta */
hto[j] = _mm256_or_si256(_mm256_xor_si256(alpha, hfrom[t2]),
delta); /* (alpha ^ hfrom[t2]) | delta */
}
Expand Down Expand Up @@ -223,10 +221,9 @@ static void transform256(__m256d *lmid, __m256d *hmid)

alpha = lfrom[t1];
beta = hfrom[t1];
delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */

lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha),
one); /* ~(delta & alpha) */
delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta));
/* alpha & (lfrom[t2] ^ beta) */
lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */
hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]),
delta); /* (alpha ^ hfrom[t2]) | delta */
}
Expand All @@ -242,10 +239,9 @@ static void transform256(__m256d *lmid, __m256d *hmid)

alpha = lfrom[t1];
beta = hfrom[t1];
delta = _mm256_xor_pd(lfrom[t2], beta); /* lfrom[t2] ^ beta */

lto[j] = _mm256_andnot_pd(_mm256_and_pd(delta, alpha),
one); /* ~(delta & alpha) */
delta = _mm256_and_pd(alpha, _mm256_xor_pd(lfrom[t2], beta));
/* alpha & (lfrom[t2] ^ beta) */
lto[j] = _mm256_andnot_pd(delta, one); /* ~delta */
hto[j] = _mm256_or_pd(_mm256_xor_pd(alpha, hfrom[t2]),
delta); /* (alpha ^ hfrom[t2]) | delta */
}
Expand Down
12 changes: 6 additions & 6 deletions src/pow_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid)
int t2 = indices[j + 1];
alpha = lfrom[t1];
beta = hfrom[t1];
delta = beta ^ lfrom[t2];
lto[j] = ~(delta & alpha);
hto[j] = delta | (alpha ^ hfrom[t2]);
delta = alpha & (lfrom[t2] ^ beta);
lto[j] = ~delta;
hto[j] = (alpha ^ hfrom[t2]) | delta;
}
uint64_t *lswap = lfrom, *hswap = hfrom;
lfrom = lto;
Expand All @@ -43,9 +43,9 @@ static void transform64(uint64_t *lmid, uint64_t *hmid)
int t2 = indices[j + 1];
alpha = lfrom[t1];
beta = hfrom[t1];
delta = beta ^ lfrom[t2];
lto[j] = ~(delta & alpha);
hto[j] = delta | (alpha ^ hfrom[t2]);
delta = alpha & (lfrom[t2] ^ beta);
lto[j] = ~delta;
hto[j] = (alpha ^ hfrom[t2]) | delta;
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/pow_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ void transform(__global bc_trit_t* state_low, __global bc_trit_t* state_high,
k = j+1;
alpha = state_low[INDEX[j]];
beta = state_high[INDEX[j]];
delta = beta ^ state_low[INDEX[k]];
sp_low[i] = ~(delta & alpha);
sp_high[i] = delta | (alpha ^ state_high[INDEX[k]]);
delta = alpha & (beta ^ state_low[INDEX[k]]);
sp_low[i] = ~delta;
sp_high[i] = (alpha ^ state_high[INDEX[k]]) | delta;
}
barrier(CLK_LOCAL_MEM_FENCE);
for (i = 0; i < n_trits; i++) {
Expand Down
12 changes: 6 additions & 6 deletions src/pow_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ static void transform128(__m128i *lmid, __m128i *hmid)
t2 = indices[j + 1];
alpha = lfrom[t1];
beta = hfrom[t1];
delta = beta ^ lfrom[t2];
lto[j] = ~(delta & alpha);
hto[j] = delta | (alpha ^ hfrom[t2]);
delta = alpha & (beta ^ lfrom[t2]);
lto[j] = ~delta;
hto[j] = (alpha ^ hfrom[t2]) | delta;
}
__m128i *lswap = lfrom, *hswap = hfrom;
lfrom = lto;
Expand All @@ -44,9 +44,9 @@ static void transform128(__m128i *lmid, __m128i *hmid)
t2 = indices[j + 1];
alpha = lfrom[t1];
beta = hfrom[t1];
delta = beta ^ lfrom[t2];
lto[j] = ~(delta & alpha);
hto[j] = delta | (alpha ^ hfrom[t2]);
delta = alpha & (beta ^ lfrom[t2]);
lto[j] = ~delta;
hto[j] = (alpha ^ hfrom[t2]) | delta;
}
}

Expand Down

0 comments on commit 0fcc94c

Please sign in to comment.