From c383a1d452d538bb47183b548c3c826bd77ade63 Mon Sep 17 00:00:00 2001 From: Jia Liu Date: Thu, 13 Jul 2023 11:27:42 +0100 Subject: [PATCH 1/3] fix bug in div_unsafe --- maingate/src/instructions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maingate/src/instructions.rs b/maingate/src/instructions.rs index fd501478..514b3d41 100644 --- a/maingate/src/instructions.rs +++ b/maingate/src/instructions.rs @@ -400,7 +400,7 @@ pub trait MainGateInstructions: Chip { [ Term::assigned_to_mul(b), Term::unassigned_to_mul(c), - Term::assigned_to_add(a), + Term::assigned_to_sub(a), ], F::ZERO, CombinationOptionCommon::OneLinerMul.into(), From 7357aefde4a60264a139b1916d0988d2bffc412f Mon Sep 17 00:00:00 2001 From: Jia Liu Date: Mon, 21 Aug 2023 17:17:29 +0100 Subject: [PATCH 2/3] windowed scalar mul with aux for base field and general chip --- ecc/Cargo.toml | 3 +- ecc/src/base_field_ecc.rs | 108 +++++++++++++------ ecc/src/base_field_ecc/mul.rs | 164 ++++++++++++++++------------- ecc/src/general_ecc.rs | 102 ++++++++++++------ ecc/src/general_ecc/mul.rs | 189 +++++++++++++++++++--------------- ecdsa/src/ecdsa.rs | 2 +- 6 files changed, 349 insertions(+), 219 deletions(-) diff --git a/ecc/Cargo.toml b/ecc/Cargo.toml index 6aa13e59..615c673a 100644 --- a/ecc/Cargo.toml +++ b/ecc/Cargo.toml @@ -15,7 +15,8 @@ subtle = { version = "2.3", default-features = false } [dev-dependencies] rand_core = { version = "0.6", default-features = false } paste = "1.0.7" +rand_chacha = "0.3.1" [features] default = [] -circuit-params = ["integer/circuit-params"] +circuit-params = ["integer/circuit-params"] \ No newline at end of file diff --git a/ecc/src/base_field_ecc.rs b/ecc/src/base_field_ecc.rs index c38e8855..92f30cbd 100644 --- a/ecc/src/base_field_ecc.rs +++ b/ecc/src/base_field_ecc.rs @@ -1,4 +1,6 @@ -use super::{make_mul_aux, AssignedPoint, EccConfig, MulAux, Point}; +use super::{AssignedPoint, EccConfig, MulAux, Point}; +use crate::halo2::arithmetic::Field; +use crate::halo2::halo2curves::ff::PrimeField; use crate::integer::chip::IntegerChip; use crate::integer::rns::{Integer, Rns}; use crate::{halo2, maingate}; @@ -29,10 +31,14 @@ pub struct BaseFieldEccChip, Value, )>, - /// Auxiliary points for optimized multiplication for each (window_size, - /// n_pairs) pairs - aux_registry: - BTreeMap<(usize, usize), AssignedPoint>, + /// Auxiliary points for optimized multiplication for each window_size + aux_registry: BTreeMap< + usize, + ( + C::Scalar, + AssignedPoint, + ), + >, } impl @@ -84,22 +90,26 @@ impl } /// Auxilary point for optimized multiplication algorithm - fn get_mul_aux( + fn get_mul_correction( &self, window_size: usize, - number_of_pairs: usize, - ) -> Result, Error> { + ) -> Result< + ( + C::Scalar, + MulAux, + ), + Error, + > { let to_add = match self.aux_generator.clone() { Some((assigned, _)) => Ok(assigned), None => Err(Error::Synthesis), }?; - let to_sub = match self.aux_registry.get(&(window_size, number_of_pairs)) { + let (scalar_correction, to_sub) = match self.aux_registry.get(&window_size) { Some(aux) => Ok(aux.clone()), None => Err(Error::Synthesis), }?; - // to_add the equivalent of AuxInit and to_sub AuxFin - // see https://hackmd.io/ncuKqRXzR-Cw-Au2fGzsMg?view - Ok(MulAux::new(to_add, to_sub)) + + Ok((scalar_correction, MulAux::new(to_add, to_sub))) } } @@ -175,20 +185,22 @@ impl Ok(()) } - /// Assigns multiplication auxiliary point for a pair of (window_size, - /// n_pairs) - pub fn assign_aux( + /// Assigns scalar correction and multiplication auxiliary point for window_size + pub fn assign_correction( &mut self, ctx: &mut RegionCtx<'_, C::Scalar>, window_size: usize, - number_of_pairs: usize, ) -> Result<(), Error> { - match self.aux_generator { - Some((_, point)) => { - let aux = point.map(|point| make_mul_aux(point, window_size, number_of_pairs)); - let aux = self.assign_point(ctx, aux)?; + let scalar_correction = self.correct_scalar(window_size); + + match &self.aux_generator { + Some((point, _)) => { + // compute correction point -2^w aux + let mut point_correction = self.double_n(ctx, point, window_size)?; + point_correction = self.neg(ctx, &point_correction)?; + self.aux_registry - .insert((window_size, number_of_pairs), aux); + .insert(window_size, (scalar_correction, point_correction)); Ok(()) } // aux generator is not assigned yet @@ -196,6 +208,25 @@ impl } } + /// correct scalar before mul; correction value is -2(1 + 2^w + ... + 2^{w(n-1)}) + fn correct_scalar(&mut self, window_size: usize) -> C::Scalar { + let window: usize = 1 << window_size; + let window_scalar = C::Scalar::from(window as u64); + + let num_bits = C::Scalar::NUM_BITS as usize; + let number_of_windows = (num_bits + window_size - 1) / window_size; + + let mut correction = C::Scalar::ONE; + let mut power = window_scalar; + for _ in 0..number_of_windows - 1 { + correction += power; + power *= window_scalar; + } + correction += correction; + + -correction + } + /// Constraints to ensure `AssignedPoint` is on curve pub fn assert_is_on_curve( &self, @@ -351,6 +382,7 @@ mod tests { use crate::integer::rns::Rns; use crate::integer::NUMBER_OF_LOOKUP_LIMBS; use crate::maingate; + use crate::maingate::DimensionMeasurement; use halo2::arithmetic::CurveAffine; use halo2::circuit::{Layouter, SimpleFloorPlanner, Value}; use halo2::halo2curves::{ @@ -365,7 +397,8 @@ mod tests { RangeInstructions, }; use paste::paste; - use rand_core::OsRng; + use rand_chacha::ChaCha20Rng; + use rand_core::{OsRng, SeedableRng}; const NUMBER_OF_LIMBS: usize = 4; const BIT_LEN_LIMB: usize = 68; @@ -659,8 +692,7 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); ecc_chip.assign_aux_generator(ctx, Value::known(self.aux_generator))?; - ecc_chip.assign_aux(ctx, self.window_size, 1)?; - ecc_chip.get_mul_aux(self.window_size, 1)?; + ecc_chip.assign_correction(ctx, self.window_size)?; Ok(()) }, )?; @@ -671,8 +703,11 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); - let base = C::CurveExt::random(OsRng); - let s = C::Scalar::random(OsRng); + // let mut rng = ChaCha20Rng::seed_from_u64(80); + let mut rng = OsRng; + + let base = C::CurveExt::random(&mut rng); + let s = C::Scalar::random(&mut rng); let result = base * s; let base = ecc_chip.assign_point(ctx, Value::known(base.into()))?; @@ -698,8 +733,11 @@ mod tests { where C::Scalar: FromUniformBytes<64>, { - for window_size in 1..5 { - let aux_generator = ::CurveExt::random(OsRng).to_affine(); + //let mut rng = ChaCha20Rng::seed_from_u64(42); + let mut rng = OsRng; + + for window_size in 2..5 { + let aux_generator = ::CurveExt::random(&mut rng).to_affine(); let circuit = TestEccMul { aux_generator, @@ -707,6 +745,11 @@ mod tests { }; let instance = vec![vec![]]; mock_prover_verify(&circuit, instance); + let dimension = DimensionMeasurement::measure(&circuit).unwrap(); + println!( + "window_size = {:?}, dimention: {:?}", + window_size, dimension + ); } } run::(); @@ -752,8 +795,7 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); ecc_chip.assign_aux_generator(ctx, Value::known(self.aux_generator))?; - ecc_chip.assign_aux(ctx, self.window_size, self.number_of_pairs)?; - ecc_chip.get_mul_aux(self.window_size, self.number_of_pairs)?; + ecc_chip.assign_correction(ctx, self.window_size)?; Ok(()) }, )?; @@ -799,8 +841,8 @@ mod tests { paste! { #[test] fn []() { - for number_of_pairs in 5..7 { - for window_size in 1..3 { + for number_of_pairs in 2..7 { + for window_size in 2..4 { let aux_generator = <$C as CurveAffine>::CurveExt::random(OsRng).to_affine(); let circuit = TestEccBatchMul { @@ -810,6 +852,8 @@ mod tests { }; let instance = vec![vec![]]; mock_prover_verify(&circuit, instance); + let dimension = DimensionMeasurement::measure(&circuit).unwrap(); + println!("(number of pairs, window_size) = ({:?}, {:?}), dimention: {:?}", number_of_pairs, window_size, dimension); } } } diff --git a/ecc/src/base_field_ecc/mul.rs b/ecc/src/base_field_ecc/mul.rs index 6c9ccf9f..7fab2523 100644 --- a/ecc/src/base_field_ecc/mul.rs +++ b/ecc/src/base_field_ecc/mul.rs @@ -9,56 +9,41 @@ use integer::maingate::RegionCtx; impl BaseFieldEccChip { - /// Pads scalar up to the next window_size mul - fn pad( - &self, - ctx: &mut RegionCtx<'_, C::Scalar>, - bits: &mut Vec>, - window_size: usize, - ) -> Result<(), Error> { - assert_eq!(bits.len(), C::Scalar::NUM_BITS as usize); - - // TODO: This is a tmp workaround. Instead of padding with zeros we can use a - // shorter ending window. - let padding_offset = (window_size - (bits.len() % window_size)) % window_size; - let zeros: Vec> = (0..padding_offset) - .map(|_| self.main_gate().assign_constant(ctx, C::Scalar::ZERO)) - .collect::>()?; - bits.extend(zeros); - bits.reverse(); - - Ok(()) - } - /// Splits the bit representation of a scalar into windows fn window(bits: Vec>, window_size: usize) -> Windowed { - assert_eq!(bits.len() % window_size, 0); - let number_of_windows = bits.len() / window_size; - Windowed( - (0..number_of_windows) - .map(|i| { - let mut selector: Vec> = (0..window_size) - .map(|j| bits[i * window_size + j].clone()) - .collect(); - selector.reverse(); - Selector(selector) - }) - .collect(), - ) + let last = bits.len() % window_size; + let num = bits.len() / window_size; + + let mut windows: Vec<_> = (0..num) + .map(|i| { + let k = i * window_size; + Selector(bits[k..k + window_size].to_vec()) + }) + .collect(); + + if last != 0 { + let last_start = bits.len() - last; + windows.push(Selector(bits[last_start..].to_vec())); + } + + windows.reverse(); + + Windowed(windows) } /// Constructs table for efficient multiplication algorithm /// The table contains precomputed point values that allow to trade /// additions for selections + /// [2]P, [3]P, ..., [2^w + 1]P fn make_incremental_table( &self, ctx: &mut RegionCtx<'_, C::Scalar>, - aux: &AssignedPoint, point: &AssignedPoint, window_size: usize, ) -> Result, Error> { let table_size = 1 << window_size; - let mut table = vec![aux.clone()]; + let double = self.double(ctx, point)?; + let mut table = vec![double]; for i in 0..(table_size - 1) { table.push(self.add(ctx, &table[i], point)?); } @@ -96,28 +81,48 @@ impl scalar: &AssignedValue, window_size: usize, ) -> Result, Error> { - assert!(window_size > 0); - let aux = self.get_mul_aux(window_size, 1)?; + assert!(window_size > 1); + let num_bits = C::Scalar::NUM_BITS as usize; + let number_of_windows = (num_bits + window_size - 1) / window_size; + let mut last = num_bits % window_size; + if last == 0 { + last = window_size; + } + let window_last: usize = 1 << last; + let (scalar_correction, aux) = self.get_mul_correction(window_size)?; let main_gate = self.main_gate(); - let decomposed = &mut main_gate.to_bits(ctx, scalar, C::Scalar::NUM_BITS as usize)?; + let scalar_adjusted = &main_gate.add_constant(ctx, scalar, scalar_correction)?; - self.pad(ctx, decomposed, window_size)?; - let windowed = Self::window(decomposed.to_vec(), window_size); - let table = &self.make_incremental_table(ctx, &aux.to_add, point, window_size)?; + let decomposed = main_gate.to_bits(ctx, &scalar_adjusted, num_bits)?; + let windowed = Self::window(decomposed, window_size); - let mut acc = self.select_multi(ctx, &windowed.0[0], table)?; - acc = self.double_n(ctx, &acc, window_size)?; + let table = &self.make_incremental_table(ctx, point, window_size)?; + let last_table = &Table(table.0[0..window_last].to_vec()); - let to_add = self.select_multi(ctx, &windowed.0[1], table)?; - acc = self.add(ctx, &acc, &to_add)?; + let mut acc = self.select_multi(ctx, &windowed.0[0], last_table)?; + acc = self.double_n(ctx, &acc, window_size)?; + let q = self.select_multi(ctx, &windowed.0[1], table)?; + acc = self._add_incomplete_unsafe(ctx, &acc, &q)?; - for selector in windowed.0.iter().skip(2) { + for i in 2..number_of_windows - 2 { acc = self.double_n(ctx, &acc, window_size - 1)?; - let to_add = self.select_multi(ctx, selector, table)?; - acc = self.ladder(ctx, &acc, &to_add)?; + let q = self.select_multi(ctx, &windowed.0[i], table)?; + acc = self._ladder_incomplete(ctx, &acc, &q)?; } + // The last two rows use auxiliary generator + // aux_1 = (2^w aux_2 + aux_generator) + Q_1 + // aux_0 = 2^w aux_1 + Q_0 - 2^w aux_generator + acc = self.double_n(ctx, &acc, window_size)?; + acc = self.add(ctx, &acc, &aux.to_add)?; + let q1 = self.select_multi(ctx, &windowed.0[number_of_windows - 2], table)?; + acc = self.add(ctx, &acc, &q1)?; + + acc = self.double_n(ctx, &acc, window_size)?; + let q0 = self.select_multi(ctx, &windowed.0[number_of_windows - 1], table)?; + acc = self.add(ctx, &acc, &q0)?; + self.add(ctx, &acc, &aux.to_sub) } @@ -137,59 +142,74 @@ impl )>, window_size: usize, ) -> Result, Error> { - assert!(window_size > 0); + assert!(window_size > 1); assert!(!pairs.is_empty()); - let aux = self.get_mul_aux(window_size, pairs.len())?; + + let num_bits = C::Scalar::NUM_BITS as usize; + let mut last = num_bits % window_size; + if last == 0 { + last = window_size; + } + let window_last: usize = 1 << last; let main_gate = self.main_gate(); - let mut decomposed_scalars: Vec>> = pairs + let (scalar_correction, aux) = self.get_mul_correction(window_size)?; + let decomposed_scalars: Vec>> = pairs .iter() - .map(|(_, scalar)| main_gate.to_bits(ctx, scalar, C::Scalar::NUM_BITS as usize)) + .map(|(_, scalar)| { + let scalar_adjusted = main_gate.add_constant(ctx, scalar, scalar_correction)?; + main_gate.to_bits(ctx, &scalar_adjusted, C::Scalar::NUM_BITS as usize) + }) .collect::>()?; - for decomposed in decomposed_scalars.iter_mut() { - self.pad(ctx, decomposed, window_size)?; - } - let windowed_scalars: Vec> = decomposed_scalars .iter() .map(|decomposed| Self::window(decomposed.to_vec(), window_size)) .collect(); let number_of_windows = windowed_scalars[0].0.len(); - let mut binary_aux = aux.to_add.clone(); let tables: Vec> = pairs .iter() - .enumerate() - .map(|(i, (point, _))| { - let table = self.make_incremental_table(ctx, &binary_aux, point, window_size); - if i != pairs.len() - 1 { - binary_aux = self.double(ctx, &binary_aux)?; - } - table - }) + .map(|(point, _)| self.make_incremental_table(ctx, point, window_size)) .collect::>()?; // preparation for the first round // initialize accumulator - let mut acc = self.select_multi(ctx, &windowed_scalars[0].0[0], &tables[0])?; + let last_table = &Table(tables[0].0[0..window_last].to_vec()); + let mut acc = self.select_multi(ctx, &windowed_scalars[0].0[0], last_table)?; // add first contributions other point scalar for (table, windowed) in tables.iter().skip(1).zip(windowed_scalars.iter().skip(1)) { + let last_table = &Table(table.0[0..window_last].to_vec()); let selector = &windowed.0[0]; - let to_add = self.select_multi(ctx, selector, table)?; - acc = self.add(ctx, &acc, &to_add)?; + let q = self.select_multi(ctx, selector, last_table)?; + acc = self.add(ctx, &acc, &q)?; } - for i in 1..number_of_windows { + for i in 1..number_of_windows - 2 { acc = self.double_n(ctx, &acc, window_size)?; for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { let selector = &windowed.0[i]; - let to_add = self.select_multi(ctx, selector, table)?; - acc = self.add(ctx, &acc, &to_add)?; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; } } + acc = self.double_n(ctx, &acc, window_size)?; + acc = self.add(ctx, &acc, &aux.to_add)?; + for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { + let selector = &windowed.0[number_of_windows - 2]; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; + } + + acc = self.double_n(ctx, &acc, window_size)?; + for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { + let selector = &windowed.0[number_of_windows - 1]; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; + } + self.add(ctx, &acc, &aux.to_sub) } } diff --git a/ecc/src/general_ecc.rs b/ecc/src/general_ecc.rs index 89fd5ee6..0845814e 100644 --- a/ecc/src/general_ecc.rs +++ b/ecc/src/general_ecc.rs @@ -1,5 +1,6 @@ -use super::{make_mul_aux, AssignedPoint, EccConfig, MulAux, Point}; +use super::{AssignedPoint, EccConfig, MulAux, Point}; use crate::halo2; +use crate::halo2::arithmetic::Field; use crate::integer::rns::{Integer, Rns}; use crate::integer::{IntegerChip, IntegerInstructions, Range, UnassignedInteger}; use crate::maingate; @@ -36,8 +37,13 @@ pub struct GeneralEccChip< )>, /// Auxiliary points for optimized multiplication for each (window_size, /// n_pairs) pairs - aux_registry: - BTreeMap<(usize, usize), AssignedPoint>, + aux_registry: BTreeMap< + usize, + ( + Integer, + AssignedPoint, + ), + >, } impl< @@ -137,23 +143,27 @@ impl< } /// Auxilary point for optimized multiplication algorithm - fn get_mul_aux( + fn get_mul_correction( &self, window_size: usize, - number_of_pairs: usize, - ) -> Result, Error> { + ) -> Result< + ( + Integer, + MulAux, + ), + Error, + > { // Gets chips' aux generator let to_add = match self.aux_generator.clone() { Some((assigned, _)) => Ok(assigned), None => Err(Error::Synthesis), }?; - let to_sub = match self.aux_registry.get(&(window_size, number_of_pairs)) { + let (scalar_correction, to_sub) = match self.aux_registry.get(&window_size) { Some(aux) => Ok(aux.clone()), None => Err(Error::Synthesis), }?; - // to_add the equivalent of AuxInit and to_sub AuxFin - // see https://hackmd.io/ncuKqRXzR-Cw-Au2fGzsMg?view - Ok(MulAux::new(to_add, to_sub)) + + Ok((scalar_correction, MulAux::new(to_add, to_sub))) } } @@ -235,20 +245,22 @@ impl< Ok(()) } - /// Assigns multiplication auxiliary point for a pair of (window_size, - /// n_pairs) - pub fn assign_aux( + /// Assigns multiplication auxiliary point for window_size + pub fn assign_correction( &mut self, ctx: &mut RegionCtx<'_, N>, window_size: usize, - number_of_pairs: usize, ) -> Result<(), Error> { - match self.aux_generator { - Some((_, point)) => { - let aux = point.map(|point| make_mul_aux(point, window_size, number_of_pairs)); - let aux = self.assign_point(ctx, aux)?; + let scalar_correction = self.correct_scalar(window_size); + + match &self.aux_generator { + Some((point, _)) => { + // compute correction point -2^w aux + let mut point_correction = self.double_n(ctx, point, window_size)?; + point_correction = self.neg(ctx, &point_correction)?; + self.aux_registry - .insert((window_size, number_of_pairs), aux); + .insert(window_size, (scalar_correction, point_correction)); Ok(()) } // aux generator is not assigned yet @@ -256,6 +268,28 @@ impl< } } + /// correct scalar before mul; correction value is -2(1 + 2^w + ... + 2^{w(n-1)}) + fn correct_scalar( + &mut self, + window_size: usize, + ) -> Integer { + let window: usize = 1 << window_size; + let window_scalar = Emulated::Scalar::from(window as u64); + + let num_bits = Emulated::Scalar::NUM_BITS as usize; + let number_of_windows = (num_bits + window_size - 1) / window_size; + + let mut correction = Emulated::Scalar::ONE; + let mut power = window_scalar; + for _ in 0..number_of_windows - 1 { + correction += power; + power *= window_scalar; + } + correction += correction; + + Integer::from_fe(-correction, self.rns_scalar()) + } + /// Constraints to ensure `AssignedPoint` is on curve pub fn assert_is_on_curve( &self, @@ -415,13 +449,15 @@ mod tests { MainGate, MainGateConfig, RangeChip, RangeConfig, RangeInstructions, RegionCtx, }; use paste::paste; - use rand_core::OsRng; + use rand_chacha::ChaCha20Rng; + use rand_core::{OsRng, SeedableRng}; use crate::curves::bn256::{Fr as BnScalar, G1Affine as Bn256}; use crate::curves::pasta::{ EpAffine as Pallas, EqAffine as Vesta, Fp as PastaFp, Fq as PastaFq, }; use crate::curves::secp256k1::Secp256k1Affine as Secp256k1; + use crate::maingate::DimensionMeasurement; const NUMBER_OF_LIMBS: usize = 4; const BIT_LEN_LIMB: usize = 68; @@ -794,8 +830,7 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); ecc_chip.assign_aux_generator(ctx, Value::known(self.aux_generator))?; - ecc_chip.assign_aux(ctx, self.window_size, 1)?; - ecc_chip.get_mul_aux(self.window_size, 1)?; + ecc_chip.assign_correction(ctx, self.window_size)?; Ok(()) }, )?; @@ -808,8 +843,11 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); - let base = C::Curve::random(OsRng); - let s = C::Scalar::random(OsRng); + // let mut rng = ChaCha20Rng::seed_from_u64(80); + let mut rng = OsRng; + + let base = C::Curve::random(&mut rng); + let s = C::Scalar::random(&mut rng); let result = base * s; let s = Integer::from_fe(s, ecc_chip.rns_scalar()); @@ -842,8 +880,10 @@ mod tests { const NUMBER_OF_LIMBS: usize, const BIT_LEN_LIMB: usize, >() { - for window_size in 1..5 { - let aux_generator = C::Curve::random(OsRng).to_affine(); + for window_size in 2..5 { + // let mut rng = ChaCha20Rng::seed_from_u64(42); + let mut rng = OsRng; + let aux_generator = C::Curve::random(&mut rng).to_affine(); let circuit = TestEccMul:: { aux_generator, @@ -852,6 +892,11 @@ mod tests { }; let instance = vec![vec![]]; mock_prover_verify(&circuit, instance); + let dimension = DimensionMeasurement::measure(&circuit).unwrap(); + println!( + "window_size = {:?}, dimention: {:?}", + window_size, dimension + ); } } @@ -921,8 +966,7 @@ mod tests { let offset = 0; let ctx = &mut RegionCtx::new(region, offset); ecc_chip.assign_aux_generator(ctx, Value::known(self.aux_generator))?; - ecc_chip.assign_aux(ctx, self.window_size, self.number_of_pairs)?; - ecc_chip.get_mul_aux(self.window_size, self.number_of_pairs)?; + ecc_chip.assign_correction(ctx, self.window_size)?; Ok(()) }, )?; @@ -976,7 +1020,7 @@ mod tests { #[test] fn []() { for number_of_pairs in 5..7 { - for window_size in 1..3 { + for window_size in 2..4 { let aux_generator = <$C as PrimeCurveAffine>::Curve::random(OsRng).to_affine(); let circuit = TestEccBatchMul::<$C, $N, $NUMBER_OF_LIMBS, $BIT_LEN_LIMB> { diff --git a/ecc/src/general_ecc/mul.rs b/ecc/src/general_ecc/mul.rs index 7b5af700..e573cf99 100644 --- a/ecc/src/general_ecc/mul.rs +++ b/ecc/src/general_ecc/mul.rs @@ -14,58 +14,43 @@ impl< const BIT_LEN_LIMB: usize, > GeneralEccChip { - /// Pads scalar up to the next window_size mul - fn pad( - &self, - region: &mut RegionCtx<'_, N>, - bits: &mut Vec>, - window_size: usize, - ) -> Result<(), Error> { - assert_eq!(bits.len(), Emulated::ScalarExt::NUM_BITS as usize); - - // TODO: This is a tmp workaround. Instead of padding with zeros we can use a - // shorter ending window. - let padding_offset = (window_size - (bits.len() % window_size)) % window_size; - let zeros: Vec> = (0..padding_offset) - .map(|_| self.main_gate().assign_constant(region, N::ZERO)) - .collect::>()?; - bits.extend(zeros); - bits.reverse(); - - Ok(()) - } - /// Splits the bit representation of a scalar into windows fn window(bits: Vec>, window_size: usize) -> Windowed { - assert_eq!(bits.len() % window_size, 0); - let number_of_windows = bits.len() / window_size; - Windowed( - (0..number_of_windows) - .map(|i| { - let mut selector: Vec> = (0..window_size) - .map(|j| bits[i * window_size + j].clone()) - .collect(); - selector.reverse(); - Selector(selector) - }) - .collect(), - ) + let last = bits.len() % window_size; + let num = bits.len() / window_size; + + let mut windows: Vec<_> = (0..num) + .map(|i| { + let k = i * window_size; + Selector(bits[k..k + window_size].to_vec()) + }) + .collect(); + + if last != 0 { + let last_start = bits.len() - last; + windows.push(Selector(bits[last_start..].to_vec())); + } + + windows.reverse(); + + Windowed(windows) } /// Constructs table for efficient multiplication algorithm /// The table contains precomputed point values that allow to trade /// additions for selections + /// [2]P, [3]P, ..., [2^w + 1]P fn make_incremental_table( &self, - region: &mut RegionCtx<'_, N>, - aux: &AssignedPoint, + ctx: &mut RegionCtx<'_, N>, point: &AssignedPoint, window_size: usize, ) -> Result, Error> { let table_size = 1 << window_size; - let mut table = vec![aux.clone()]; + let double = self.double(ctx, point)?; + let mut table = vec![double]; for i in 0..(table_size - 1) { - table.push(self.add(region, &table[i], point)?); + table.push(self.add(ctx, &table[i], point)?); } Ok(Table(table)) } @@ -96,33 +81,55 @@ impl< /// Performed with the sliding-window algorithm pub fn mul( &self, - region: &mut RegionCtx<'_, N>, + ctx: &mut RegionCtx<'_, N>, point: &AssignedPoint, scalar: &AssignedInteger, window_size: usize, ) -> Result, Error> { - assert!(window_size > 0); - let aux = self.get_mul_aux(window_size, 1)?; + assert!(window_size > 1); + let num_bits = Emulated::Scalar::NUM_BITS as usize; + let number_of_windows = (num_bits + window_size - 1) / window_size; + let mut last = num_bits % window_size; + if last == 0 { + last = window_size; + } + let window_last: usize = 1 << last; let scalar_chip = self.scalar_field_chip(); - let decomposed = &mut scalar_chip.decompose(region, scalar)?; - self.pad(region, decomposed, window_size)?; - let windowed = Self::window(decomposed.to_vec(), window_size); - let table = &self.make_incremental_table(region, &aux.to_add, point, window_size)?; - let mut acc = self.select_multi(region, &windowed.0[0], table)?; - acc = self.double_n(region, &acc, window_size)?; + let (scalar_correction, aux) = self.get_mul_correction(window_size)?; + let scalar_adjusted = &scalar_chip.add_constant(ctx, scalar, &scalar_correction)?; + let scalar_reduced = &scalar_chip.reduce(ctx, scalar_adjusted)?; + let decomposed = scalar_chip.decompose(ctx, scalar_reduced)?; + let windowed = Self::window(decomposed, window_size); + + let table = &self.make_incremental_table(ctx, point, window_size)?; + let last_table = &Table(table.0[0..window_last].to_vec()); + let mut acc = self.select_multi(ctx, &windowed.0[0], last_table)?; - let to_add = self.select_multi(region, &windowed.0[1], table)?; - acc = self.add(region, &acc, &to_add)?; + acc = self.double_n(ctx, &acc, window_size)?; + let q = self.select_multi(ctx, &windowed.0[1], table)?; + acc = self._add_incomplete_unsafe(ctx, &acc, &q)?; - for selector in windowed.0.iter().skip(2) { - acc = self.double_n(region, &acc, window_size - 1)?; - let to_add = self.select_multi(region, selector, table)?; - acc = self.ladder(region, &acc, &to_add)?; + for i in 2..number_of_windows - 2 { + acc = self.double_n(ctx, &acc, window_size - 1)?; + let q = self.select_multi(ctx, &windowed.0[i], table)?; + acc = self._ladder_incomplete(ctx, &acc, &q)?; } - self.add(region, &acc, &aux.to_sub) + // The last two rows use auxiliary generator + // aux_1 = (2^w aux_2 + aux_generator) + Q_1 + // aux_0 = 2^w aux_1 + Q_0 - 2^w aux_generator + acc = self.double_n(ctx, &acc, window_size)?; + acc = self.add(ctx, &acc, &aux.to_add)?; + let q1 = self.select_multi(ctx, &windowed.0[number_of_windows - 2], table)?; + acc = self.add(ctx, &acc, &q1)?; + + acc = self.double_n(ctx, &acc, window_size)?; + let q0 = self.select_multi(ctx, &windowed.0[number_of_windows - 1], table)?; + acc = self.add(ctx, &acc, &q0)?; + + self.add(ctx, &acc, &aux.to_sub) } /// Computes multi-product @@ -134,68 +141,82 @@ impl< #[allow(clippy::type_complexity)] pub fn mul_batch_1d_horizontal( &self, - region: &mut RegionCtx<'_, N>, + ctx: &mut RegionCtx<'_, N>, pairs: Vec<( AssignedPoint, AssignedInteger, )>, window_size: usize, ) -> Result, Error> { - assert!(window_size > 0); + assert!(window_size > 1); assert!(!pairs.is_empty()); - let aux = self.get_mul_aux(window_size, pairs.len())?; + + let num_bits = Emulated::Scalar::NUM_BITS as usize; + let mut last = num_bits % window_size; + if last == 0 { + last = window_size; + } + let window_last: usize = 1 << last; let scalar_chip = self.scalar_field_chip(); + let (scalar_correction, aux) = self.get_mul_correction(window_size)?; + // 1. Decompose scalars in bits - let mut decomposed_scalars: Vec>> = pairs + let decomposed_scalars: Vec>> = pairs .iter() - .map(|(_, scalar)| scalar_chip.decompose(region, scalar)) + .map(|(_, scalar)| { + let scalar_adjusted = &scalar_chip.add_constant(ctx, scalar, &scalar_correction)?; + let scalar_reduced = &scalar_chip.reduce(ctx, scalar_adjusted)?; + scalar_chip.decompose(ctx, scalar_reduced) + }) .collect::>()?; - // 2. Pad scalars bit representations - for decomposed in decomposed_scalars.iter_mut() { - self.pad(region, decomposed, window_size)?; - } - - // 3. Split scalar bits into windows + // 2. Split scalar bits into windows let windowed_scalars: Vec> = decomposed_scalars .into_iter() .map(|decomposed| Self::window(decomposed, window_size)) .collect(); let number_of_windows = windowed_scalars[0].0.len(); - let mut binary_aux = aux.to_add.clone(); let tables: Vec> = pairs .iter() - .enumerate() - .map(|(i, (point, _))| { - let table = self.make_incremental_table(region, &binary_aux, point, window_size); - if i != pairs.len() - 1 { - binary_aux = self.double(region, &binary_aux)?; - } - table - }) + .map(|(point, _)| self.make_incremental_table(ctx, point, window_size)) .collect::>()?; - // preparation for the first round - // initialize accumulator - let mut acc = self.select_multi(region, &windowed_scalars[0].0[0], &tables[0])?; + let last_table = &Table(tables[0].0[0..window_last].to_vec()); + let mut acc = self.select_multi(ctx, &windowed_scalars[0].0[0], last_table)?; // add first contributions other point scalar for (table, windowed) in tables.iter().skip(1).zip(windowed_scalars.iter().skip(1)) { + let last_table = &Table(table.0[0..window_last].to_vec()); let selector = &windowed.0[0]; - let to_add = self.select_multi(region, selector, table)?; - acc = self.add(region, &acc, &to_add)?; + let q = self.select_multi(ctx, selector, last_table)?; + acc = self.add(ctx, &acc, &q)?; } - for i in 1..number_of_windows { - acc = self.double_n(region, &acc, window_size)?; + for i in 1..number_of_windows - 2 { + acc = self.double_n(ctx, &acc, window_size)?; for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { let selector = &windowed.0[i]; - let to_add = self.select_multi(region, selector, table)?; - acc = self.add(region, &acc, &to_add)?; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; } } - self.add(region, &acc, &aux.to_sub) + acc = self.double_n(ctx, &acc, window_size)?; + acc = self.add(ctx, &acc, &aux.to_add)?; + for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { + let selector = &windowed.0[number_of_windows - 2]; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; + } + + acc = self.double_n(ctx, &acc, window_size)?; + for (table, windowed) in tables.iter().zip(windowed_scalars.iter()) { + let selector = &windowed.0[number_of_windows - 1]; + let q = self.select_multi(ctx, selector, table)?; + acc = self.add(ctx, &acc, &q)?; + } + + self.add(ctx, &acc, &aux.to_sub) } } diff --git a/ecdsa/src/ecdsa.rs b/ecdsa/src/ecdsa.rs index eac14d62..d7b7891f 100644 --- a/ecdsa/src/ecdsa.rs +++ b/ecdsa/src/ecdsa.rs @@ -248,7 +248,7 @@ mod tests { let ctx = &mut RegionCtx::new(region, offset); ecc_chip.assign_aux_generator(ctx, Value::known(self.aux_generator))?; - ecc_chip.assign_aux(ctx, self.window_size, 2)?; + ecc_chip.assign_correction(ctx, self.window_size)?; Ok(()) }, )?; From c75195be1f6499d6e7f71bbff15419acbbdb0079 Mon Sep 17 00:00:00 2001 From: Jia Liu <58184672+kitounliu@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:39:21 +0100 Subject: [PATCH 3/3] Create README.md --- ecc/README.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 ecc/README.md diff --git a/ecc/README.md b/ecc/README.md new file mode 100644 index 00000000..0a2498c3 --- /dev/null +++ b/ecc/README.md @@ -0,0 +1,45 @@ +### Windowed scalar mul using auxiliary generator + +$$ +\begin{align} +0 &~~~~~~~ 1: ~~~~~~~~~~ [2]P ~~ [3]P ~~ \cdots~\cdots ~~~ [2^w +1]P \\ +1 &~~~~~~ 2^w: ~~~~~~~~~ [2]P ~~ [3]P ~~ \cdots~\cdots ~~~ [2^w +1]P \\ +2 &~~~~ (2^w)^2: ~~~~~~ [2]P ~~ [3]P ~~ \cdots~\cdots ~~~ [2^w +1]P \\ +\cdots & \cdots \\ +n-3 & ~~~~ (2^w)^{n-3}: ~~ [2]P ~~ [3]P ~~ \cdots~\cdots ~~~ [2^w +1]P \\ +n-2 & ~~~~ (2^w)^{n-2}: ~~ [2]P ~~ [3]P ~~ \cdots~\cdots ~~~ [2^w +1]P \\ +n-1 & ~~~~ (2^w)^{n-1}: ~~ [2]P ~~ [3]P ~~ \cdots ~~~ [2^\ell +1]P \\ +\end{align} +$$ + +where window_size $w>1$ and scalar_size $= w(n-1) + \ell$ with $1\leq \ell \leq w$. + +The scalar $k\in F_r$ can be adjusted upfront $k' = k - (2*\sum_{0\leq j\leq n-1} 2^{wj}) \mod r$ to avoid computing +correction point $[\sum_{0\leq j\leq n-1}2* 2^{wj}]P$. This works for both base_field_chip and general_ecc_chip. + +The accumulation $acc_i$ is computed from the bottom up: + +$$ +\begin{align} +acc_{n-1} & = Q_{n-1} \\ +acc_{n-2} & = 2^w acc_{n-1} + Q_{n-2} \\ +acc_i & = 2^w acc_{i+1} + Q_i \\ +& = 2(2^{w-1} acc_{i+1}) + Q_i \text{ for } i = n-3,...,2 +\end{align} +$$ + +The scalar in $acc_{n-1},\dots, acc_2$ increases monotonically, and $acc_{n-3}...acc_2$ can be computed using laddr_incomplete. +The last two steps $acc_{1}, acc_0$ might overflow (when $\ell = 1$ and $w = 2$) +and need to use auxiliary generator and addition with assertions to ensure the x-coordinates are not the same: + +$$ +\begin{align} +acc_1 & = (2^{w} acc_2 + aux) + Q_1\\ +acc_0 & = (2^w acc_1 + Q_0) - 2^w aux +\end{align} +$$ + +### mul_batch_1d_horizontal +This algorithm uses addtion with assertions in all steps and the auxiliary generator in the last two steps. It is only suitable for computing +$e_1 P_1 + e_2 P_2 + \cdots + e_n P_n$, where $P_1, \dots, P_n$ are randomly chosen, +i.e., their discrete logarithms are unknown. The algorithm is not suitable for computing things like $eP + sP$.