Skip to content

Commit

Permalink
Update sm9_z256_alg.c
Browse files Browse the repository at this point in the history
  • Loading branch information
guanzhi committed Mar 27, 2024
1 parent b476ef7 commit 6de0e02
Showing 1 changed file with 30 additions and 12 deletions.
42 changes: 30 additions & 12 deletions src/sm9_z256_alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ const sm9_z256_t SM9_Z256_NEG_N = {0x1a911e63296130db, 0xb60d6cb4e7157411, 0x29f
const sm9_z256_t SM9_Z256_N_MINUS_ONE = {0xe56ee19cd69ecf24, 0x49f2934b18ea8bee, 0xd603ab4ff58ec744, 0xb640000002a3a6f1};


// e = p - 2 = b640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b
// p - 2, used in a^(p-2) = a^-1
const sm9_z256_t SM9_Z256_P_MINUS_TWO = {0xe56f9b27e351457b, 0x21f2934b1a7aeedb, 0xd603ab4ff58ec745, 0xb640000002a3a6f1};


// P1.X 0x93DE051D62BF718FF5ED0704487D01D6E1E4086909DC3280E8C4E4817C66DDDD
// P1.Y 0x21FE8DDA4F21E607631065125C395BBC1C1C00CBFA6024350C464CD70A3EA616
const SM9_Z256_POINT _SM9_Z256_P1 = {
Expand Down Expand Up @@ -491,6 +496,8 @@ void sm9_z256_fp_neg(sm9_z256_t r, const sm9_z256_t a)
}
#endif

// (w0,w1) = a*b + c + d
#if 0
void sm9_u64_mul_add(uint64_t *w0, uint64_t *w1,
const uint64_t a, const uint64_t b, const uint64_t c, const uint64_t d)
{
Expand Down Expand Up @@ -519,7 +526,7 @@ void sm9_u64_mul_add(uint64_t *w0, uint64_t *w1,
for (i = 0; i < 2; i++) {
r[i] = (s[2 * i + 1] << 32) | s[2 * i];
}

r[0] += c;
if (r[0] < c) {
r[1]++;
Expand All @@ -528,10 +535,11 @@ void sm9_u64_mul_add(uint64_t *w0, uint64_t *w1,
if (r[0] < d) {
r[1]++;
}

*w0 = r[0];
*w1 = r[1];
}
#endif


// p = b640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457d
Expand All @@ -542,6 +550,7 @@ const uint64_t SM9_Z256_P_PRIME[4] = {
};


#ifndef ENABLE_SM9_Z256_ARMV8
// z = a*b
// c = (z + (z * p' mod 2^256) * p)/2^256
void sm9_z256_fp_mont_mul(uint64_t r[4], const uint64_t a[4], const uint64_t b[4])
Expand Down Expand Up @@ -572,33 +581,38 @@ void sm9_z256_fp_mont_mul(uint64_t r[4], const uint64_t a[4], const uint64_t b[4
(void)sm9_z256_sub(r, r, SM9_Z256_P);
}
}
#endif

// TODO: NEON/SVE/SVE2 implementation
#if 0
void sm9_z256_fp_mont_mul_2way(sm9_z256_t r, const sm9_z256_t a, const sm9_z256_t b)
{
sm9_z256_t d = {0}, e = {0};
uint64_t q, t0, t1, p0, p1, tmp;
uint64_t pre = SM9_Z256_MODP_MU * b[0];
int i, j;

for (j = 0; j < 4; j++) {
q = pre * a[j] + SM9_Z256_MODP_MU * (d[0]-e[0]);

sm9_u64_mul_add(&tmp, &t0, a[j], b[0], d[0], 0);
sm9_u64_mul_add(&tmp, &t1, q, SM9_Z256_P[0], e[0], 0);

for (i = 1; i < 4; i++) {
sm9_u64_mul_add(&d[i-1], &t0, a[j], b[i], t0, d[i]);
sm9_u64_mul_add(&e[i-1], &t1, q, SM9_Z256_P[i], t1, e[i]);
}
d[3] = t0;
e[3] = t1;
}

if (sm9_z256_sub(r, d, e)) {
sm9_z256_add(r, r, SM9_Z256_P);
}
}
#endif

#ifndef ENABLE_SM9_Z256_ARMV8
void sm9_z256_fp_to_mont(sm9_z256_t r, const sm9_z256_t a)
{
sm9_z256_fp_mont_mul(r, a, SM9_Z256_MODP_2e512);
Expand All @@ -608,12 +622,15 @@ void sm9_z256_fp_from_mont(sm9_z256_t r, const sm9_z256_t a)
{
sm9_z256_fp_mont_mul(r, a, SM9_Z256_ONE);
}
#endif

// ASM
void sm9_z256_fp_mont_sqr(sm9_z256_t r, const sm9_z256_t a)
{
sm9_z256_fp_mont_mul(r, a, a);
}

// change args name to a_mont, r_mont
void sm9_z256_fp_pow(sm9_z256_t r, const sm9_z256_t a, const sm9_z256_t e)
{
sm9_z256_t t;
Expand All @@ -626,9 +643,9 @@ void sm9_z256_fp_pow(sm9_z256_t r, const sm9_z256_t a, const sm9_z256_t e)
for (i = 3; i >= 0; i--) {
w = e[i];
for (j = 0; j < 64; j++) {
sm9_z256_fp_sqr(t, t);
sm9_z256_fp_mont_sqr(t, t);
if (w & 0x8000000000000000) {
sm9_z256_fp_mul(t, t, a);
sm9_z256_fp_mont_mul(t, t, a);
}
w <<= 1;
}
Expand All @@ -637,12 +654,9 @@ void sm9_z256_fp_pow(sm9_z256_t r, const sm9_z256_t a, const sm9_z256_t e)
sm9_z256_copy(r, t);
}

// TODO: what is sm2_mont_inv?
void sm9_z256_fp_inv(sm9_z256_t r, const sm9_z256_t a)
{
sm9_z256_t e;
sm9_z256_sub(e, SM9_Z256_P, SM9_Z256_TWO);
sm9_z256_fp_pow(r, a, e);
sm9_z256_fp_pow(r, a, SM9_Z256_P_MINUS_TWO);
}

int sm9_z256_fp_from_bytes(sm9_z256_t r, const uint8_t buf[32])
Expand Down Expand Up @@ -764,6 +778,10 @@ void sm9_z256_fp2_to_hex(const sm9_z256_fp2 a, char hex[129])
sm9_z256_fp_to_hex(a[0], hex + 65);
}

// TODO:
// fp2, fp4 函数可以粗粒度并行,或者调用 __sm9_z256_fp_add 来函数开始和结束的开销
// 是否需要给fp2提供独立的展开函数?还是直接展开fp4,提供armv8?

void sm9_z256_fp2_add(sm9_z256_fp2 r, const sm9_z256_fp2 a, const sm9_z256_fp2 b)
{
sm9_z256_fp_add(r[0], a[0], b[0]);
Expand Down

0 comments on commit 6de0e02

Please sign in to comment.