From ba59c8b482518a9f06e91708c46be3eeb228f1f6 Mon Sep 17 00:00:00 2001 From: xevisalle Date: Sat, 28 May 2022 00:44:46 +0200 Subject: [PATCH 1/3] Apply patch to improve G2 arithmetic and pairings --- src/fp.rs | 61 ++++++++++++++++++++++++++++ src/fp2.rs | 36 +++++++---------- src/fp6.rs | 114 ++++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 157 insertions(+), 54 deletions(-) diff --git a/src/fp.rs b/src/fp.rs index 1dbb9ace..7c9ac9a1 100644 --- a/src/fp.rs +++ b/src/fp.rs @@ -434,6 +434,67 @@ impl Fp { (&rhs.neg()).add(self) } + /// Returns `c = a.zip(b).fold(0, |acc, (a_i, b_i)| acc + a_i * b_i)`. + /// + /// Implements Algorithm 2 from Patrick Longa's + /// [ePrint 2022-367](https://eprint.iacr.org/2022/367) §3. + #[inline] + pub(crate) fn sum_of_products(a: [Fp; T], b: [Fp; T]) -> Fp { + // For a single `a x b` multiplication, operand scanning (schoolbook) takes each + // limb of `a` in turn, and multiplies it by all of the limbs of `b` to compute + // the result as a double-width intermediate representation, which is then fully + // reduced at the end. Here however we have pairs of multiplications (a_i, b_i), + // the results of which are summed. + // + // The intuition for this algorithm is two-fold: + // - We can interleave the operand scanning for each pair, by processing the jth + // limb of each `a_i` together. As these have the same offset within the overall + // operand scanning flow, their results can be summed directly. + // - We can interleave the multiplication and reduction steps, resulting in a + // single bitshift by the limb size after each iteration. This means we only + // need to store a single extra limb overall, instead of keeping around all the + // intermediate results and eventually having twice as many limbs. + + // Algorithm 2, line 2 + let (u0, u1, u2, u3, u4, u5) = + (0..6).fold((0, 0, 0, 0, 0, 0), |(u0, u1, u2, u3, u4, u5), j| { + // Algorithm 2, line 3 + // For each pair in the overall sum of products: + let (t0, t1, t2, t3, t4, t5, t6) = (0..T).fold( + (u0, u1, u2, u3, u4, u5, 0), + |(t0, t1, t2, t3, t4, t5, t6), i| { + // Compute digit_j x row and accumulate into `u`. + let (t0, carry) = mac(t0, a[i].0[j], b[i].0[0], 0); + let (t1, carry) = mac(t1, a[i].0[j], b[i].0[1], carry); + let (t2, carry) = mac(t2, a[i].0[j], b[i].0[2], carry); + let (t3, carry) = mac(t3, a[i].0[j], b[i].0[3], carry); + let (t4, carry) = mac(t4, a[i].0[j], b[i].0[4], carry); + let (t5, carry) = mac(t5, a[i].0[j], b[i].0[5], carry); + let (t6, _) = adc(t6, 0, carry); + + (t0, t1, t2, t3, t4, t5, t6) + }, + ); + + // Algorithm 2, lines 4-5 + // This is a single step of the usual Montgomery reduction process. + let k = t0.wrapping_mul(INV); + let (_, carry) = mac(t0, k, MODULUS[0], 0); + let (r1, carry) = mac(t1, k, MODULUS[1], carry); + let (r2, carry) = mac(t2, k, MODULUS[2], carry); + let (r3, carry) = mac(t3, k, MODULUS[3], carry); + let (r4, carry) = mac(t4, k, MODULUS[4], carry); + let (r5, carry) = mac(t5, k, MODULUS[5], carry); + let (r6, _) = adc(t6, 0, carry); + + (r1, r2, r3, r4, r5, r6) + }); + + // Because we represent F_p elements in non-redundant form, we need a final + // conditional subtraction to ensure the output is in range. + (&Fp([u0, u1, u2, u3, u4, u5])).subtract_p() + } + #[inline(always)] const fn montgomery_reduce( t0: u64, diff --git a/src/fp2.rs b/src/fp2.rs index 3c2fab96..ec31f1bc 100644 --- a/src/fp2.rs +++ b/src/fp2.rs @@ -284,31 +284,23 @@ impl Fp2 { } } - pub const fn mul(&self, rhs: &Fp2) -> Fp2 { - // Karatsuba multiplication: + pub fn mul(&self, rhs: &Fp2) -> Fp2 { + // F_{p^2} x F_{p^2} multiplication implemented with operand scanning (schoolbook) + // computes the result as: // - // v0 = a0 * b0 - // v1 = a1 * b1 - // c0 = v0 + \beta * v1 - // c1 = (a0 + a1) * (b0 + b1) - v0 - v1 + // a·b = (a_0 b_0 + a_1 b_1 β) + (a_0 b_1 + a_1 b_0)i // - // In BLS12-381's F_{p^2}, our \beta is -1 so we - // can modify this formula. (Also, since we always - // subtract v1, we can compute v1 = -a1 * b1.) + // In BLS12-381's F_{p^2}, our β is -1, so the resulting F_{p^2} element is: + // + // c_0 = a_0 b_0 - a_1 b_1 + // c_1 = a_0 b_1 + a_1 b_0 // - // v0 = a0 * b0 - // v1 = (-a1) * b1 - // c0 = v0 + v1 - // c1 = (a0 + a1) * (b0 + b1) - v0 + v1 - - let v0 = (&self.c0).mul(&rhs.c0); - let v1 = (&(&self.c1).neg()).mul(&rhs.c1); - let c0 = (&v0).add(&v1); - let c1 = (&(&self.c0).add(&self.c1)).mul(&(&rhs.c0).add(&rhs.c1)); - let c1 = (&c1).sub(&v0); - let c1 = (&c1).add(&v1); - - Fp2 { c0, c1 } + // Each of these is a "sum of products", which we can compute efficiently. + + Fp2 { + c0: Fp::sum_of_products([self.c0, -self.c1], [rhs.c0, rhs.c1]), + c1: Fp::sum_of_products([self.c0, self.c1], [rhs.c1, rhs.c0]), + } } pub const fn add(&self, rhs: &Fp2) -> Fp2 { diff --git a/src/fp6.rs b/src/fp6.rs index fc0ad8ae..adc57391 100644 --- a/src/fp6.rs +++ b/src/fp6.rs @@ -284,6 +284,87 @@ impl Fp6 { self.c0.is_zero() & self.c1.is_zero() & self.c2.is_zero() } + /// Returns `c = self * b`. + /// + /// Implements the full-tower interleaving strategy from + /// [ePrint 2022-376](https://eprint.iacr.org/2022/367). + #[inline] + fn mul_interleaved(&self, b: &Self) -> Self { + // The intuition for this algorithm is that we can look at F_p^6 as a direct + // extension of F_p^2, and express the overall operations down to the base field + // F_p instead of only over F_p^2. This enables us to interleave multiplications + // and reductions, ensuring that we don't require double-width intermediate + // representations (with around twice as many limbs as F_p elements). + + // We want to express the multiplication c = a x b, where a = (a_0, a_1, a_2) is + // an element of F_p^6, and a_i = (a_i,0, a_i,1) is an element of F_p^2. The fully + // expanded multiplication is given by (2022-376 §5): + // + // c_0,0 = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1 + // - a_1,0 b_2,1 - a_1,1 b_2,0 - a_2,0 b_1,1 - a_2,1 b_1,0. + // = a_0,0 b_0,0 - a_0,1 b_0,1 + a_1,0 (b_2,0 - b_2,1) - a_1,1 (b_2,0 + b_2,1) + // + a_2,0 (b_1,0 - b_1,1) - a_2,1 (b_1,0 + b_1,1). + // + // c_0,1 = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0 b_2,1 + a_1,1 b_2,0 + a_2,0 b_1,1 + a_2,1 b_1,0 + // + a_1,0 b_2,0 - a_1,1 b_2,1 + a_2,0 b_1,0 - a_2,1 b_1,1. + // = a_0,0 b_0,1 + a_0,1 b_0,0 + a_1,0(b_2,0 + b_2,1) + a_1,1(b_2,0 - b_2,1) + // + a_2,0(b_1,0 + b_1,1) + a_2,1(b_1,0 - b_1,1). + // + // c_1,0 = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0 b_2,0 - a_2,1 b_2,1 + // - a_2,0 b_2,1 - a_2,1 b_2,0. + // = a_0,0 b_1,0 - a_0,1 b_1,1 + a_1,0 b_0,0 - a_1,1 b_0,1 + a_2,0(b_2,0 - b_2,1) + // - a_2,1(b_2,0 + b_2,1). + // + // c_1,1 = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0 b_2,1 + a_2,1 b_2,0 + // + a_2,0 b_2,0 - a_2,1 b_2,1 + // = a_0,0 b_1,1 + a_0,1 b_1,0 + a_1,0 b_0,1 + a_1,1 b_0,0 + a_2,0(b_2,0 + b_2,1) + // + a_2,1(b_2,0 - b_2,1). + // + // c_2,0 = a_0,0 b_2,0 - a_0,1 b_2,1 + a_1,0 b_1,0 - a_1,1 b_1,1 + a_2,0 b_0,0 - a_2,1 b_0,1. + // c_2,1 = a_0,0 b_2,1 + a_0,1 b_2,0 + a_1,0 b_1,1 + a_1,1 b_1,0 + a_2,0 b_0,1 + a_2,1 b_0,0. + // + // Each of these is a "sum of products", which we can compute efficiently. + + let a = self; + let b10_p_b11 = b.c1.c0 + b.c1.c1; + let b10_m_b11 = b.c1.c0 - b.c1.c1; + let b20_p_b21 = b.c2.c0 + b.c2.c1; + let b20_m_b21 = b.c2.c0 - b.c2.c1; + + Fp6 { + c0: Fp2 { + c0: Fp::sum_of_products( + [a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1], + [b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11], + ), + c1: Fp::sum_of_products( + [a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11], + ), + }, + c1: Fp2 { + c0: Fp::sum_of_products( + [a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1], + [b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21], + ), + c1: Fp::sum_of_products( + [a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21], + ), + }, + c2: Fp2 { + c0: Fp::sum_of_products( + [a.c0.c0, -a.c0.c1, a.c1.c0, -a.c1.c1, a.c2.c0, -a.c2.c1], + [b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1], + ), + c1: Fp::sum_of_products( + [a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0], + ), + }, + } + } + #[inline] pub fn square(&self) -> Self { let s0 = self.c0.square(); @@ -328,38 +409,7 @@ impl<'a, 'b> Mul<&'b Fp6> for &'a Fp6 { #[inline] fn mul(self, other: &'b Fp6) -> Self::Output { - let aa = self.c0 * other.c0; - let bb = self.c1 * other.c1; - let cc = self.c2 * other.c2; - - let t1 = other.c1 + other.c2; - let tmp = self.c1 + self.c2; - let t1 = t1 * tmp; - let t1 = t1 - bb; - let t1 = t1 - cc; - let t1 = t1.mul_by_nonresidue(); - let t1 = t1 + aa; - - let t3 = other.c0 + other.c2; - let tmp = self.c0 + self.c2; - let t3 = t3 * tmp; - let t3 = t3 - aa; - let t3 = t3 + bb; - let t3 = t3 - cc; - - let t2 = other.c0 + other.c1; - let tmp = self.c0 + self.c1; - let t2 = t2 * tmp; - let t2 = t2 - aa; - let t2 = t2 - bb; - let cc = cc.mul_by_nonresidue(); - let t2 = t2 + cc; - - Fp6 { - c0: t1, - c1: t2, - c2: t3, - } + self.mul_interleaved(other) } } From 5ea3bda3db87bed11478efb05c661f682873bf54 Mon Sep 17 00:00:00 2001 From: xevisalle Date: Sat, 28 May 2022 00:56:28 +0200 Subject: [PATCH 2/3] Add patch to have fast subgroup check for is_torsion_free --- src/g1.rs | 51 +++++++++++++++++++++++++++++++++++++++++++-------- src/g2.rs | 15 ++++++--------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/g1.rs b/src/g1.rs index 2d8a05aa..2fa73b9d 100644 --- a/src/g1.rs +++ b/src/g1.rs @@ -439,15 +439,14 @@ impl G1Affine { /// exists within the $q$-order subgroup $\mathbb{G}_1$. This should always return true /// unless an "unchecked" API was used. pub fn is_torsion_free(&self) -> Choice { - const FQ_MODULUS_BYTES: [u8; 32] = [ - 1, 0, 0, 0, 255, 255, 255, 255, 254, 91, 254, 255, 2, 164, 189, 83, 5, 216, 161, 9, 8, - 216, 57, 51, 72, 125, 157, 41, 83, 167, 237, 115, - ]; + // Algorithm from Section 6 of https://eprint.iacr.org/2021/1130 + // Updated proof of correctness in https://eprint.iacr.org/2022/352 + // + // Check that endomorphism_p(P) == -[x^2] P - // Clear the r-torsion from the point and check if it is the identity - G1Projective::from(*self) - .multiply(&FQ_MODULUS_BYTES) - .is_identity() + let minus_x_squared_times_p = G1Projective::from(self).mul_by_x().mul_by_x().neg(); + let endomorphism_p = endomorphism(self); + minus_x_squared_times_p.ct_eq(&G1Projective::from(endomorphism_p)) } /// Returns true if this point is on the curve. This should always return @@ -458,6 +457,25 @@ impl G1Affine { } } +/// A nontrivial third root of unity in Fp +pub const BETA: Fp = Fp::from_raw_unchecked([ + 0x30f1_361b_798a_64e8, + 0xf3b8_ddab_7ece_5a2a, + 0x16a8_ca3a_c615_77f7, + 0xc26a_2ff8_74fd_029b, + 0x3636_b766_6070_1c6e, + 0x051b_a4ab_241b_6160, +]); + +fn endomorphism(p: &G1Affine) -> G1Affine { + // Endomorphism of the points on the curve. + // endomorphism_p(x,y) = (BETA * x, y) + // where BETA is a non-trivial cubic root of unity in Fq. + let mut res = p.clone(); + res.x *= BETA; + res +} + /// This is an element of $\mathbb{G}_1$ represented in the projective coordinate space. #[derive(Copy, Clone, Debug)] #[cfg_attr(feature = "canon", derive(Canon))] @@ -896,6 +914,23 @@ impl G1Projective { mod tests { use super::*; + #[test] + fn test_beta() { + assert_eq!( + BETA, + Fp::from_bytes(&[ + 0x00u8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x19, 0x67, 0x2f, 0xdf, 0x76, + 0xce, 0x51, 0xba, 0x69, 0xc6, 0x07, 0x6a, 0x0f, 0x77, 0xea, 0xdd, 0xb3, 0xa9, 0x3b, + 0xe6, 0xf8, 0x96, 0x88, 0xde, 0x17, 0xd8, 0x13, 0x62, 0x0a, 0x00, 0x02, 0x2e, 0x01, + 0xff, 0xff, 0xff, 0xfe, 0xff, 0xfe + ]) + .unwrap() + ); + assert_ne!(BETA, Fp::one()); + assert_ne!(BETA * BETA, Fp::one()); + assert_eq!(BETA * BETA * BETA, Fp::one()); + } + #[test] fn test_is_on_curve() { assert!(bool::from(G1Affine::identity().is_on_curve())); diff --git a/src/g2.rs b/src/g2.rs index 0c88b107..df4bd037 100644 --- a/src/g2.rs +++ b/src/g2.rs @@ -500,15 +500,12 @@ impl G2Affine { /// exists within the $q$-order subgroup $\mathbb{G}_2$. This should always return true /// unless an "unchecked" API was used. pub fn is_torsion_free(&self) -> Choice { - const FQ_MODULUS_BYTES: [u8; 32] = [ - 1, 0, 0, 0, 255, 255, 255, 255, 254, 91, 254, 255, 2, 164, 189, 83, 5, 216, 161, 9, 8, - 216, 57, 51, 72, 125, 157, 41, 83, 167, 237, 115, - ]; - - // Clear the r-torsion from the point and check if it is the identity - G2Projective::from(*self) - .multiply(&FQ_MODULUS_BYTES) - .is_identity() + // Algorithm from Section 4 of https://eprint.iacr.org/2021/1130 + // Updated proof of correctness in https://eprint.iacr.org/2022/352 + // + // Check that psi(P) == [x] P + let p = G2Projective::from(self); + p.psi().ct_eq(&p.mul_by_x()) } /// Returns true if this point is on the curve. This should always return From f05bc09601563a14f428414761ac5c5686bfae29 Mon Sep 17 00:00:00 2001 From: xevisalle Date: Sat, 28 May 2022 01:20:17 +0200 Subject: [PATCH 3/3] Update CHANGELOG.md and fix fmt --- CHANGELOG.md | 3 +++ src/g1.rs | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed6fca22..3fe7bb88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Apply patches from `zkcrypto` to improve the efficiency [#86](https://github.com/dusk-network/bls12_381/issues/86) + ## [0.10.0] - 2022-05-25 ### Changed diff --git a/src/g1.rs b/src/g1.rs index 2fa73b9d..4c8b34ae 100644 --- a/src/g1.rs +++ b/src/g1.rs @@ -919,10 +919,10 @@ mod tests { assert_eq!( BETA, Fp::from_bytes(&[ - 0x00u8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x19, 0x67, 0x2f, 0xdf, 0x76, - 0xce, 0x51, 0xba, 0x69, 0xc6, 0x07, 0x6a, 0x0f, 0x77, 0xea, 0xdd, 0xb3, 0xa9, 0x3b, - 0xe6, 0xf8, 0x96, 0x88, 0xde, 0x17, 0xd8, 0x13, 0x62, 0x0a, 0x00, 0x02, 0x2e, 0x01, - 0xff, 0xff, 0xff, 0xfe, 0xff, 0xfe + 0x00u8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x19, 0x67, 0x2f, 0xdf, + 0x76, 0xce, 0x51, 0xba, 0x69, 0xc6, 0x07, 0x6a, 0x0f, 0x77, 0xea, 0xdd, 0xb3, 0xa9, + 0x3b, 0xe6, 0xf8, 0x96, 0x88, 0xde, 0x17, 0xd8, 0x13, 0x62, 0x0a, 0x00, 0x02, 0x2e, + 0x01, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xfe ]) .unwrap() );