Skip to content

Commit

Permalink
optimized the fallback code for uint320 multiplication
Browse files Browse the repository at this point in the history
  • Loading branch information
mrdcvlsc committed Apr 6, 2022
1 parent 8bff93f commit 5163822
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 27 deletions.
1 change: 0 additions & 1 deletion tests/multiplication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ int main() {
ASSERT_UINT512(MAXMAX,CMAXMAX,"MAX*MAX");
ASSERT_UINT512(NUM1NUM2,CNUM1NUM2,"NUM1*NUM2");


// SUMMARY OF RESULTS
size_t failed_cnt = 0;
for(auto e : TEST_RESULTS) {
Expand Down
51 changes: 25 additions & 26 deletions uint320.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,30 +349,31 @@ uint320 uint320::operator*(const uint320& mr) const {

#if(__x86_64 || __x86_64__ || __amd64 || __amd64__ || __aarch64__ || __aarch64)
#if(_MSC_VER || _PURE_CPP)

__uint128_t __uint128_product[UINT320LIMBS+1] = {0,0,0,0,0,0};

for(size_t i=0; i<UINT320LIMBS; ++i) {
for(size_t j=0; j<UINT320LIMBS-i; ++j) {


// index product
__uint128_t prd = (__uint128_t)limbs[j] * (__uint128_t)mr.limbs[i];

// low part add
__uint128_product[i+j] += ((prd << UINT64BITS) >> UINT64BITS);
__uint128_product[i+j+1] += (prd >> UINT64BITS); // carry

for(size_t k=j+i; k<UINT320LIMBS; ++k) {
if((ulongint)(__uint128_product[k] >> UINT64BITS)) { // if carry again in carry
__uint128_product[k+1] += __uint128_product[k] >> UINT64BITS;
__uint128_product[k] = (__uint128_product[k] << UINT64BITS) >> UINT64BITS;
}
else if(k>j+i){
break;
}
}

// high part add
__uint128_product[i+j+1] += (prd >> UINT64BITS); // high-carry

// last carry
__uint128_product[i+j+1] += __uint128_product[i+j] >> UINT64BITS;
__uint128_product[i+j] = (__uint128_product[i+j] << 64) >> 64;
}
}

for(size_t i=0; i<UINT320LIMBS; ++i) {
pd.limbs[i] = __uint128_product[i];
}

#elif ((__clang__ || __GNUC__ || __GNUG__ || __MINGW64__) && (__aarch64__ || __aarch64))
#ifndef _HIDE_WARNING
#warning using GCC inline asm, please enable optimization flag, recomended : -O2, to enable use C++ implementation instead, enable the -D_PURE_CPP flag.
Expand All @@ -382,20 +383,19 @@ uint320 uint320::operator*(const uint320& mr) const {

for(size_t i=0; i<UINT320LIMBS; ++i) {
for(size_t j=0; j<UINT320LIMBS-i; ++j) {


// index product
__uint128_t prd = (__uint128_t)limbs[j] * (__uint128_t)mr.limbs[i];

// low part add
__uint128_product[i+j] += ((prd << UINT64BITS) >> UINT64BITS);
__uint128_product[i+j+1] += (prd >> UINT64BITS); // carry

for(size_t k=j+i; k<UINT320LIMBS; ++k) {
if((ulongint)(__uint128_product[k] >> UINT64BITS)) { // if carry again in carry
__uint128_product[k+1] += __uint128_product[k] >> UINT64BITS;
__uint128_product[k] = (__uint128_product[k] << UINT64BITS) >> UINT64BITS;
}
else if(k>j+i){
break;
}
}

// high part add
__uint128_product[i+j+1] += (prd >> UINT64BITS); // high-carry

// last carry
__uint128_product[i+j+1] += __uint128_product[i+j] >> UINT64BITS;
__uint128_product[i+j] = (__uint128_product[i+j] << 64) >> 64;
}
}

Expand Down Expand Up @@ -511,9 +511,8 @@ uint320 uint320::operator*(const uint320& mr) const {
[mc4]"m"(limbs[4])
: "rax","rdx","cc"
);

#else
#error uint320 is not supported on this compiler
#error uint320 is not supported on this compiler
#endif
#else
#error uint320 is not supported on 32-bit computers (x86 architectures)
Expand Down

0 comments on commit 5163822

Please sign in to comment.