Skip to content
This repository has been archived by the owner on Jun 20, 2023. It is now read-only.

Commit

Permalink
add x86_64 target
Browse files Browse the repository at this point in the history
  • Loading branch information
ashWhiteHat committed Jan 27, 2022
1 parent a1f8f2e commit 919ed9d
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 137 deletions.
98 changes: 0 additions & 98 deletions src/bn256/assembly.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
macro_rules! assembly_field {
($field:ident, $modulus:ident, $inv:ident) => {
impl $field {
/// Returns zero, the additive identity.
#[inline]
pub const fn zero() -> $field {
$field([0, 0, 0, 0])
}

/// Returns one, the multiplicative identity.
#[inline]
pub const fn one() -> $field {
R
}

/// Doubles this field element.
#[inline]
pub fn double(&self) -> $field {
Expand Down Expand Up @@ -68,92 +56,6 @@ macro_rules! assembly_field {
$field([r0, r1, r2, r3])
}

fn from_u512(limbs: [u64; 8]) -> $field {
// We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
// with the higher bits multiplied by 2^256. Thus, we perform two reductions
//
// 1. the lower bits are multiplied by R^2, as normal
// 2. the upper bits are multiplied by R^2 * 2^256 = R^3
//
// and computing their sum in the field. It remains to see that arbitrary 256-bit
// numbers can be placed into Montgomery form safely using the reduction. The
// reduction works so long as the product is less than R=2^256 multiplied by
// the modulus. This holds because for any `c` smaller than the modulus, we have
// that (2^256 - 1)*c is an acceptable product for the reduction. Therefore, the
// reduction always works so long as `c` is in the field; in this case it is either the
// constant `R2` or `R3`.
let d0 = $field([limbs[0], limbs[1], limbs[2], limbs[3]]);
let d1 = $field([limbs[4], limbs[5], limbs[6], limbs[7]]);
// Convert to Montgomery form
d0 * R2 + d1 * R3
}

/// Converts from an integer represented in little endian
/// into its (congruent) `Fr` representation.
pub const fn from_raw(val: [u64; 4]) -> $field {
let (r0, carry) = mac(0, val[0], R2.0[0], 0);
let (r1, carry) = mac(0, val[0], R2.0[1], carry);
let (r2, carry) = mac(0, val[0], R2.0[2], carry);
let (r3, r4) = mac(0, val[0], R2.0[3], carry);

let (r1, carry) = mac(r1, val[1], R2.0[0], 0);
let (r2, carry) = mac(r2, val[1], R2.0[1], carry);
let (r3, carry) = mac(r3, val[1], R2.0[2], carry);
let (r4, r5) = mac(r4, val[1], R2.0[3], carry);

let (r2, carry) = mac(r2, val[2], R2.0[0], 0);
let (r3, carry) = mac(r3, val[2], R2.0[1], carry);
let (r4, carry) = mac(r4, val[2], R2.0[2], carry);
let (r5, r6) = mac(r5, val[2], R2.0[3], carry);

let (r3, carry) = mac(r3, val[3], R2.0[0], 0);
let (r4, carry) = mac(r4, val[3], R2.0[1], carry);
let (r5, carry) = mac(r5, val[3], R2.0[2], carry);
let (r6, r7) = mac(r6, val[3], R2.0[3], carry);

let k = r0.wrapping_mul(INV);
let (_, carry) = mac(r0, k, $modulus.0[0], 0);
let (r1, carry) = mac(r1, k, $modulus.0[1], carry);
let (r2, carry) = mac(r2, k, $modulus.0[2], carry);
let (r3, carry) = mac(r3, k, $modulus.0[3], carry);
let (r4, carry2) = adc(r4, 0, carry);

let k = r1.wrapping_mul(INV);
let (_, carry) = mac(r1, k, $modulus.0[0], 0);
let (r2, carry) = mac(r2, k, $modulus.0[1], carry);
let (r3, carry) = mac(r3, k, $modulus.0[2], carry);
let (r4, carry) = mac(r4, k, $modulus.0[3], carry);
let (r5, carry2) = adc(r5, carry2, carry);

let k = r2.wrapping_mul(INV);
let (_, carry) = mac(r2, k, $modulus.0[0], 0);
let (r3, carry) = mac(r3, k, $modulus.0[1], carry);
let (r4, carry) = mac(r4, k, $modulus.0[2], carry);
let (r5, carry) = mac(r5, k, $modulus.0[3], carry);
let (r6, carry2) = adc(r6, carry2, carry);

let k = r3.wrapping_mul(INV);
let (_, carry) = mac(r3, k, $modulus.0[0], 0);
let (r4, carry) = mac(r4, k, $modulus.0[1], carry);
let (r5, carry) = mac(r5, k, $modulus.0[2], carry);
let (r6, carry) = mac(r6, k, $modulus.0[3], carry);
let (r7, _) = adc(r7, carry2, carry);

let (d0, borrow) = sbb(r4, $modulus.0[0], 0);
let (d1, borrow) = sbb(r5, $modulus.0[1], borrow);
let (d2, borrow) = sbb(r6, $modulus.0[2], borrow);
let (d3, borrow) = sbb(r7, $modulus.0[3], borrow);

// If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
// borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus.
let (d0, carry) = adc(d0, $modulus.0[0] & borrow, 0);
let (d1, carry) = adc(d1, $modulus.0[1] & borrow, carry);
let (d2, carry) = adc(d2, $modulus.0[2] & borrow, carry);
let (d3, _) = adc(d3, $modulus.0[3] & borrow, carry);

$field([d0, d1, d2, d3])
}

/// Squares this element.
#[inline]
pub fn square(&self) -> $field {
Expand Down
40 changes: 20 additions & 20 deletions src/bn256/fq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rand::RngCore;
use std::io::{self, Read, Write};
use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};

#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
use super::assembly::assembly_field;

#[derive(Clone, Copy, Eq)]
Expand Down Expand Up @@ -214,9 +214,6 @@ impl<'a, 'b> Mul<&'b Fq> for &'a Fq {
impl_binops_additive!(Fq, Fq);
impl_binops_multiplicative!(Fq, Fq);

#[cfg(feature = "asm")]
assembly_field!(Fq, MODULUS, INV);

impl Fq {
pub const fn size() -> usize {
32
Expand Down Expand Up @@ -254,10 +251,10 @@ impl Fq {
pub fn to_bytes(&self) -> [u8; 32] {
// Turn into canonical form by computing
// (a.R) / R = a
#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);

#[cfg(not(feature = "asm"))]
#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);

let mut res = [0; 32];
Expand Down Expand Up @@ -287,10 +284,7 @@ impl Fq {
LegendreSymbol::QuadraticNonResidue
}
}
}

#[cfg(not(feature = "asm"))]
impl Fq {
/// Returns zero, the additive identity.
#[inline]
pub const fn zero() -> Fq {
Expand All @@ -303,13 +297,6 @@ impl Fq {
R
}

/// Doubles this field element.
#[inline]
pub const fn double(&self) -> Fq {
// TODO: This can be achieved more efficiently with a bitshift.
self.add(self)
}

fn from_u512(limbs: [u64; 8]) -> Fq {
// We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
// with the higher bits multiplied by 2^256. Thus, we perform two reductions
Expand All @@ -335,6 +322,19 @@ impl Fq {
pub const fn from_raw(val: [u64; 4]) -> Self {
(&Fq(val)).mul(&R2)
}
}

#[cfg(all(feature = "asm", target_arch = "x86_64"))]
assembly_field!(Fq, MODULUS, INV);

#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
impl Fq {
/// Doubles this field element.
#[inline]
pub const fn double(&self) -> Fq {
// TODO: This can be achieved more efficiently with a bitshift.
self.add(self)
}

/// Squares this element.
#[inline]
Expand Down Expand Up @@ -598,11 +598,11 @@ impl ff::PrimeField for Fq {
fn to_repr(&self) -> Self::Repr {
// Turn into canonical form by computing
// (a.R) / R = a
#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
let tmp =
Self::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);

#[cfg(not(feature = "asm"))]
#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
let tmp = Self::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);

let mut res = [0; 32];
Expand Down Expand Up @@ -690,10 +690,10 @@ impl FieldExt for Fq {
/// Gets the lower 128 bits of this field element when expressed
/// canonically.
fn get_lower_128(&self) -> u128 {
#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
let tmp = Fq::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);

#[cfg(not(feature = "asm"))]
#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
let tmp = Fq::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);

u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64)
Expand Down
36 changes: 18 additions & 18 deletions src/bn256/fr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption};

use crate::arithmetic::{adc, mac, sbb, BaseExt, FieldExt, Group};

#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
use super::assembly::assembly_field;

#[derive(Clone, Copy, Eq, Hash)]
Expand Down Expand Up @@ -222,17 +222,11 @@ impl<'a, 'b> Mul<&'b Fr> for &'a Fr {
impl_binops_additive!(Fr, Fr);
impl_binops_multiplicative!(Fr, Fr);

#[cfg(feature = "asm")]
assembly_field!(Fr, MODULUS, INV);

impl Fr {
pub fn legendre(&self) -> LegendreSymbol {
unimplemented!()
}
}

#[cfg(not(feature = "asm"))]
impl Fr {
/// Returns zero, the additive identity.
#[inline]
pub const fn zero() -> Fr {
Expand All @@ -245,13 +239,6 @@ impl Fr {
R
}

/// Doubles this field element.
#[inline]
pub const fn double(&self) -> Fr {
// TODO: This can be achieved more efficiently with a bitshift.
self.add(self)
}

fn from_u512(limbs: [u64; 8]) -> Fr {
// We reduce an arbitrary 512-bit number by decomposing it into two 256-bit digits
// with the higher bits multiplied by 2^256. Thus, we perform two reductions
Expand All @@ -277,6 +264,19 @@ impl Fr {
pub const fn from_raw(val: [u64; 4]) -> Self {
(&Fr(val)).mul(&R2)
}
}

#[cfg(all(feature = "asm", target_arch = "x86_64"))]
assembly_field!(Fr, MODULUS, INV);

#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
impl Fr {
/// Doubles this field element.
#[inline]
pub const fn double(&self) -> Fr {
// TODO: This can be achieved more efficiently with a bitshift.
self.add(self)
}

/// Squares this element.
#[inline]
Expand Down Expand Up @@ -538,10 +538,10 @@ impl ff::PrimeField for Fr {
fn to_repr(&self) -> Self::Repr {
// Turn into canonical form by computing
// (a.R) / R = a
#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);

#[cfg(not(feature = "asm"))]
#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);

let mut res = [0; 32];
Expand Down Expand Up @@ -672,10 +672,10 @@ impl FieldExt for Fr {
/// Gets the lower 128 bits of this field element when expressed
/// canonically.
fn get_lower_128(&self) -> u128 {
#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
let tmp = Fr::montgomery_reduce(&[self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0]);

#[cfg(not(feature = "asm"))]
#[cfg(any(not(feature = "asm"), not(target_arch = "x86_64")))]
let tmp = Fr::montgomery_reduce(self.0[0], self.0[1], self.0[2], self.0[3], 0, 0, 0, 0);

u128::from(tmp.0[0]) | (u128::from(tmp.0[1]) << 64)
Expand Down
2 changes: 1 addition & 1 deletion src/bn256/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod fq6;
mod fr;
mod g;

#[cfg(feature = "asm")]
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
mod assembly;

pub use engine::*;
Expand Down

0 comments on commit 919ed9d

Please sign in to comment.