diff --git a/src/bn256/assembly.rs b/src/bn256/assembly.rs index 495b6b8..da23ae6 100644 --- a/src/bn256/assembly.rs +++ b/src/bn256/assembly.rs @@ -1,18 +1,6 @@ macro_rules! assembly_field { ($field:ident, $modulus:ident, $inv:ident) => { impl $field { - /// Returns zero, the additive identity. - #[inline] - pub const fn zero() -> $field { - $field([0, 0, 0, 0]) - } - - /// Returns one, the multiplicative identity. - #[inline] - pub const fn one() -> $field { - R - } - /// Doubles this field element. #[inline] pub fn double(&self) -> $field { @@ -68,92 +56,6 @@ macro_rules! assembly_field { $field([r0, r1, r2, r3]) } - fn from_u512(limbs: [u64; 8]) -> $field { - // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits - // with the higher bits multiplied by 2^256. Thus, we perform two reductions - // - // 1. the lower bits are multiplied by R^2, as normal - // 2. the upper bits are multiplied by R^2 * 2^256 = R^3 - // - // and computing their sum in the field. It remains to see that arbitrary 256-bit - // numbers can be placed into Montgomery form safely using the reduction. The - // reduction works so long as the product is less than R=2^256 multiplied by - // the modulus. This holds because for any `c` smaller than the modulus, we have - // that (2^256 - 1)*c is an acceptable product for the reduction. Therefore, the - // reduction always works so long as `c` is in the field; in this case it is either the - // constant `R2` or `R3`. - let d0 = $field([limbs[0], limbs[1], limbs[2], limbs[3]]); - let d1 = $field([limbs[4], limbs[5], limbs[6], limbs[7]]); - // Convert to Montgomery form - d0 * R2 + d1 * R3 - } - - /// Converts from an integer represented in little endian - /// into its (congruent) `Fr` representation. - pub const fn from_raw(val: [u64; 4]) -> $field { - let (r0, carry) = mac(0, val[0], R2.0[0], 0); - let (r1, carry) = mac(0, val[0], R2.0[1], carry); - let (r2, carry) = mac(0, val[0], R2.0[2], carry); - let (r3, r4) = mac(0, val[0], R2.0[3], carry); - - let (r1, carry) = mac(r1, val[1], R2.0[0], 0); - let (r2, carry) = mac(r2, val[1], R2.0[1], carry); - let (r3, carry) = mac(r3, val[1], R2.0[2], carry); - let (r4, r5) = mac(r4, val[1], R2.0[3], carry); - - let (r2, carry) = mac(r2, val[2], R2.0[0], 0); - let (r3, carry) = mac(r3, val[2], R2.0[1], carry); - let (r4, carry) = mac(r4, val[2], R2.0[2], carry); - let (r5, r6) = mac(r5, val[2], R2.0[3], carry); - - let (r3, carry) = mac(r3, val[3], R2.0[0], 0); - let (r4, carry) = mac(r4, val[3], R2.0[1], carry); - let (r5, carry) = mac(r5, val[3], R2.0[2], carry); - let (r6, r7) = mac(r6, val[3], R2.0[3], carry); - - let k = r0.wrapping_mul(INV); - let (_, carry) = mac(r0, k, $modulus.0[0], 0); - let (r1, carry) = mac(r1, k, $modulus.0[1], carry); - let (r2, carry) = mac(r2, k, $modulus.0[2], carry); - let (r3, carry) = mac(r3, k, $modulus.0[3], carry); - let (r4, carry2) = adc(r4, 0, carry); - - let k = r1.wrapping_mul(INV); - let (_, carry) = mac(r1, k, $modulus.0[0], 0); - let (r2, carry) = mac(r2, k, $modulus.0[1], carry); - let (r3, carry) = mac(r3, k, $modulus.0[2], carry); - let (r4, carry) = mac(r4, k, $modulus.0[3], carry); - let (r5, carry2) = adc(r5, carry2, carry); - - let k = r2.wrapping_mul(INV); - let (_, carry) = mac(r2, k, $modulus.0[0], 0); - let (r3, carry) = mac(r3, k, $modulus.0[1], carry); - let (r4, carry) = mac(r4, k, $modulus.0[2], carry); - let (r5, carry) = mac(r5, k, $modulus.0[3], carry); - let (r6, carry2) = adc(r6, carry2, carry); - - let k = r3.wrapping_mul(INV); - let (_, carry) = mac(r3, k, $modulus.0[0], 0); - let (r4, carry) = mac(r4, k, $modulus.0[1], carry); - let (r5, carry) = mac(r5, k, $modulus.0[2], carry); - let (r6, carry) = mac(r6, k, $modulus.0[3], carry); - let (r7, _) = adc(r7, carry2, carry); - - let (d0, borrow) = sbb(r4, $modulus.0[0], 0); - let (d1, borrow) = sbb(r5, $modulus.0[1], borrow); - let (d2, borrow) = sbb(r6, $modulus.0[2], borrow); - let (d3, borrow) = sbb(r7, $modulus.0[3], borrow); - - // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise - // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus. - let (d0, carry) = adc(d0, $modulus.0[0] & borrow, 0); - let (d1, carry) = adc(d1, $modulus.0[1] & borrow, carry); - let (d2, carry) = adc(d2, $modulus.0[2] & borrow, carry); - let (d3, _) = adc(d3, $modulus.0[3] & borrow, carry); - - $field([d0, d1, d2, d3]) - } - /// Squares this element. #[inline] pub fn square(&self) -> $field { diff --git a/src/bn256/fq.rs b/src/bn256/fq.rs index 4fd2ecf..4da19d4 100644 --- a/src/bn256/fq.rs +++ b/src/bn256/fq.rs @@ -7,7 +7,7 @@ use rand::RngCore; use std::io::{self, Read, Write}; use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; -#[cfg(feature = "asm")] +#[cfg(all(feature = "asm", target_arch = "x86_64"))] use super::assembly::assembly_field; #[derive(Clone, Copy, Eq)] @@ -214,9 +214,6 @@ impl<'a, 'b> Mul<&'b Fq> for &'a Fq { impl_binops_additive!(Fq, Fq); impl_binops_multiplicative!(Fq, Fq); -#[cfg(feature = "asm")] -assembly_field!(Fq, MODULUS, INV); - impl Fq { pub const fn size() -> usize { 32 @@ -254,10 +251,10 @@ impl Fq { pub fn to_bytes(&self) -> [u8; 32] { // Turn into canonical form by computing // (a.R) / R = a - #[cfg(feature = "asm")] + #[cfg(all(feature = "asm", target_arch = "x86_64"))] let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]); - #[cfg(not(feature = "asm"))] + #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); let mut res = [0; 32]; @@ -287,10 +284,7 @@ impl Fq { LegendreSymbol::QuadraticNonResidue } } -} -#[cfg(not(feature = "asm"))] -impl Fq { /// Returns zero, the additive identity. #[inline] pub const fn zero() -> Fq { @@ -303,13 +297,6 @@ impl Fq { R } - /// Doubles this field element. - #[inline] - pub const fn double(&self) -> Fq { - // TODO: This can be achieved more efficiently with a bitshift. - self.add(self) - } - fn from_u512(limbs: [u64; 8]) -> Fq { // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits // with the higher bits multiplied by 2^256. Thus, we perform two reductions @@ -335,6 +322,19 @@ impl Fq { pub const fn from_raw(val: [u64; 4]) -> Self { (&Fq(val)).mul(&R2) } +} + +#[cfg(all(feature = "asm", target_arch = "x86_64"))] +assembly_field!(Fq, MODULUS, INV); + +#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] +impl Fq { + /// Doubles this field element. + #[inline] + pub const fn double(&self) -> Fq { + // TODO: This can be achieved more efficiently with a bitshift. + self.add(self) + } /// Squares this element. #[inline] @@ -598,11 +598,11 @@ impl ff::PrimeField for Fq { fn to_repr(&self) -> Self::Repr { // Turn into canonical form by computing // (a.R) / R = a - #[cfg(feature = "asm")] + #[cfg(all(feature = "asm", target_arch = "x86_64"))] let tmp = Self::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]); - #[cfg(not(feature = "asm"))] + #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] let tmp = Self::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); let mut res = [0; 32]; @@ -690,10 +690,10 @@ impl FieldExt for Fq { /// Gets the lower 128 bits of this field element when expressed /// canonically. fn get_lower_128(&self) -> u128 { - #[cfg(feature = "asm")] + #[cfg(all(feature = "asm", target_arch = "x86_64"))] let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]); - #[cfg(not(feature = "asm"))] + #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64) diff --git a/src/bn256/fr.rs b/src/bn256/fr.rs index 9d10662..c67a42b 100644 --- a/src/bn256/fr.rs +++ b/src/bn256/fr.rs @@ -8,7 +8,7 @@ use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption}; use crate::arithmetic::{adc, mac, sbb, BaseExt, FieldExt, Group}; -#[cfg(feature = "asm")] +#[cfg(all(feature = "asm", target_arch = "x86_64"))] use super::assembly::assembly_field; #[derive(Clone, Copy, Eq, Hash)] @@ -222,17 +222,11 @@ impl<'a, 'b> Mul<&'b Fr> for &'a Fr { impl_binops_additive!(Fr, Fr); impl_binops_multiplicative!(Fr, Fr); -#[cfg(feature = "asm")] -assembly_field!(Fr, MODULUS, INV); - impl Fr { pub fn legendre(&self) -> LegendreSymbol { unimplemented!() } -} -#[cfg(not(feature = "asm"))] -impl Fr { /// Returns zero, the additive identity. #[inline] pub const fn zero() -> Fr { @@ -245,13 +239,6 @@ impl Fr { R } - /// Doubles this field element. - #[inline] - pub const fn double(&self) -> Fr { - // TODO: This can be achieved more efficiently with a bitshift. - self.add(self) - } - fn from_u512(limbs: [u64; 8]) -> Fr { // We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits // with the higher bits multiplied by 2^256. Thus, we perform two reductions @@ -277,6 +264,19 @@ impl Fr { pub const fn from_raw(val: [u64; 4]) -> Self { (&Fr(val)).mul(&R2) } +} + +#[cfg(all(feature = "asm", target_arch = "x86_64"))] +assembly_field!(Fr, MODULUS, INV); + +#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] +impl Fr { + /// Doubles this field element. + #[inline] + pub const fn double(&self) -> Fr { + // TODO: This can be achieved more efficiently with a bitshift. + self.add(self) + } /// Squares this element. #[inline] @@ -538,10 +538,10 @@ impl ff::PrimeField for Fr { fn to_repr(&self) -> Self::Repr { // Turn into canonical form by computing // (a.R) / R = a - #[cfg(feature = "asm")] + #[cfg(all(feature = "asm", target_arch = "x86_64"))] let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]); - #[cfg(not(feature = "asm"))] + #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); let mut res = [0; 32]; @@ -672,10 +672,10 @@ impl FieldExt for Fr { /// Gets the lower 128 bits of this field element when expressed /// canonically. fn get_lower_128(&self) -> u128 { - #[cfg(feature = "asm")] + #[cfg(all(feature = "asm", target_arch = "x86_64"))] let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]); - #[cfg(not(feature = "asm"))] + #[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))] let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0); u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64) diff --git a/src/bn256/mod.rs b/src/bn256/mod.rs index b31e18b..c6c9467 100644 --- a/src/bn256/mod.rs +++ b/src/bn256/mod.rs @@ -6,7 +6,7 @@ mod fq6; mod fr; mod g; -#[cfg(feature = "asm")] +#[cfg(all(feature = "asm", target_arch = "x86_64"))] mod assembly; pub use engine::*;