Skip to content

Commit

Permalink
Optimize squaring of big integers
Browse files Browse the repository at this point in the history
The routine for squaring a big integer did not have all optimizations
that the multiplication routine had.
  • Loading branch information
bjorng committed Aug 18, 2023
1 parent 8cab1c6 commit 4253b86
Showing 1 changed file with 46 additions and 37 deletions.
83 changes: 46 additions & 37 deletions erts/emulator/beam/big.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,44 +692,53 @@ static dsize_t I_sqr(ErtsDigit* x, dsize_t xl, ErtsDigit* r)
*x = 0;

while(xl--) {
ErtsDigit* y;
ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0;
ErtsDigit b0, b1;
ErtsDigit z0, z1, z2;
ErtsDigit t;
dsize_t y_l = xl;

d = *x;
x++;
y = x;
s = r;

DMUL(d, d, b1, b0);
DSUMc(*s, b0, y_3, t);
*s++ = t;
z1 = b1;
while(y_l--) {
DMUL(d, *y, b1, b0);
y++;
DSUMc(b0, b0, y_0, z0);
DSUMc(z0, z1, y_2, z2);
DSUMc(*s, z2, y_3, t);
*s++ = t;
DSUMc(b1, b1, y_1, z1);
}
z0 = y_0;
DSUMc(z0, z1, y_2, z2);
DSUMc(*s, z2, y_3, t);
*s = t;
if (xl != 0) {
s++;
t = (y_1+y_2+y_3);
*s = t;
r += 2;
}
else {
ASSERT((y_1+y_2+y_3) == 0);
}
d = *x++;
s = r;

if (d == 0) {
s += y_l + 1;
if (xl != 0) {
*++s = 0;
r += 2;
}
} else {
ErtsDigit* y;
ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0;
ErtsDigit b0, b1;
ErtsDigit z0, z1, z2;
ErtsDigit t;

y = x;

DMUL(d, d, b1, b0);
DSUMc(*s, b0, y_3, t);
*s++ = t;
z1 = b1;
while(y_l--) {
DMUL(d, *y, b1, b0);
y++;
DSUMc(b0, b0, y_0, z0);
DSUMc(z0, z1, y_2, z2);
DSUMc(*s, z2, y_3, t);
*s++ = t;
DSUMc(b1, b1, y_1, z1);
}
z0 = y_0;
DSUMc(z0, z1, y_2, z2);
DSUMc(*s, z2, y_3, t);
*s = t;
if (xl != 0) {
s++;
t = (y_1+y_2+y_3);
*s = t;
r += 2;
}
else {
ASSERT((y_1+y_2+y_3) == 0);
}
}
}
if (*s == 0)
return (s - r0);
Expand All @@ -749,7 +758,7 @@ static dsize_t I_mul_karatsuba(ErtsDigit* x, dsize_t xl, ErtsDigit* y,

if (yl < 16) {
/* Use the basic algorithm. */
if (x == y) {
if (x == y && xl > 1) {
ASSERT(xl == yl);
return I_sqr(x, xl, r);
} else {
Expand Down

0 comments on commit 4253b86

Please sign in to comment.