diff --git a/Cargo.lock b/Cargo.lock index 6b54012e61b..74216eefa12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1243,6 +1243,11 @@ dependencies = [ "generic-array 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "smallvec" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "strsim" version = "0.8.0" @@ -1639,6 +1644,7 @@ dependencies = [ "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "uucore 0.0.4 (git+https://github.com/uutils/uucore.git?branch=canary)", "uucore_procs 0.0.4 (git+https://github.com/uutils/uucore.git?branch=canary)", ] @@ -2682,6 +2688,7 @@ dependencies = [ "checksum sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" "checksum sha2 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d963c78ce367df26d7ea8b8cc655c651b42e8a1e584e869c1e17dae3ccb116a" "checksum sha3 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "26405905b6a56a94c60109cfda62610507ac14a65be531f5767dec5c5a8dd6a0" +"checksum smallvec 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum syn 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)" = "cc371affeffc477f42a221a1e4297aedcea33d47d19b61455588bd9d8f6b19ac" diff --git a/Cargo.toml b/Cargo.toml index 7a5965107a8..bcf9a76156b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -329,10 +329,12 @@ pin_same-file = { version="1.0.4, < 1.0.6", package="same-file" } ## same-file v pin_winapi-util = { version="0.1.2, < 0.1.3", package="winapi-util" } ## winapi-util v0.1.3 has compiler errors for MinRustV v1.32.0, expects 1.34 [dev-dependencies] +conv = "0.3" filetime = "0.2" libc = "0.2" -rand = "0.6" +rand = "0.7" regex = "1.0" +sha1 = { version="0.6", features=["std"] } tempfile = "3.1" time = "0.1" unindent = "0.1" diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index c4dbc96ccc6..a37ee524952 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -18,6 +18,7 @@ num-traits = "0.2" # used in src/numerics.rs, which is included by build.rs [dependencies] num-traits = "0.2" rand = { version="0.7", features=["small_rng"] } +smallvec = { version="0.6.13, < 1.0" } uucore = { version="0.0.4", package="uucore", git="https://github.com/uutils/uucore.git", branch="canary" } uucore_procs = { version="0.0.4", package="uucore_procs", git="https://github.com/uutils/uucore.git", branch="canary" } diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 4e5322084cb..a7dc4fa2e24 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -7,33 +7,44 @@ extern crate rand; +use smallvec::SmallVec; use std::cell::RefCell; use std::fmt; -use crate::numeric::{Arithmetic, Montgomery}; +use crate::numeric::{gcd, Arithmetic, Montgomery}; use crate::{miller_rabin, rho, table}; type Exponent = u8; #[derive(Clone, Debug)] -struct Decomposition(Vec<(u64, Exponent)>); +struct Decomposition(SmallVec<[(u64, Exponent); NUM_FACTORS_INLINE]>); + +// The number of factors to inline directly into a `Decomposition` object. +// As a consequence of the Erdős–Kac theorem, the average number of prime factors +// of integers < 10²⁵ ≃ 2⁸³ is 4, so we can use a slightly higher value. +const NUM_FACTORS_INLINE: usize = 5; impl Decomposition { fn one() -> Decomposition { - Decomposition(Vec::new()) + Decomposition(SmallVec::new()) } fn add(&mut self, factor: u64, exp: Exponent) { debug_assert!(exp > 0); + // Assert the factor doesn't already exist in the Decomposition object + debug_assert_eq!(self.0.iter_mut().find(|(f, _)| *f == factor), None); - if let Some((_, e)) = self.0.iter_mut().find(|(f, _)| *f == factor) { - *e += exp; - } else { - self.0.push((factor, exp)) - } + self.0.push((factor, exp)) + } + + fn is_one(&self) -> bool { + self.0.is_empty() + } + + fn pop(&mut self) -> Option<(u64, Exponent)> { + self.0.pop() } - #[cfg(test)] fn product(&self) -> u64 { self.0 .iter() @@ -77,11 +88,11 @@ impl Factors { self.0.borrow_mut().add(prime, exp) } + #[cfg(test)] pub fn push(&mut self, prime: u64) { self.add(prime, 1) } - #[cfg(test)] fn product(&self) -> u64 { self.0.borrow().product() } @@ -102,62 +113,116 @@ impl fmt::Display for Factors { } } -fn _factor(num: u64, f: Factors) -> Factors { +fn _find_factor(num: u64) -> Option { use miller_rabin::Result::*; - // Shadow the name, so the recursion automatically goes from “Big” arithmetic to small. - let _factor = |n, f| { - if n < (1 << 32) { - _factor::>(n, f) - } else { - _factor::(n, f) - } - }; - - if num == 1 { - return f; - } - let n = A::new(num); - let divisor = match miller_rabin::test::(n) { - Prime => { - let mut r = f; - r.push(num); - return r; - } - - Composite(d) => d, - Pseudoprime => rho::find_divisor::(n), - }; + match miller_rabin::test::(n) { + Prime => None, + Composite(d) => Some(d), + Pseudoprime => Some(rho::find_divisor::(n)), + } +} - let f = _factor(divisor, f); - _factor(num / divisor, f) +fn find_factor(num: u64) -> Option { + if num < (1 << 32) { + _find_factor::>(num) + } else { + _find_factor::>(num) + } } -pub fn factor(mut n: u64) -> Factors { +pub fn factor(num: u64) -> Factors { let mut factors = Factors::one(); - if n < 2 { + if num < 2 { return factors; } - let z = n.trailing_zeros(); - if z > 0 { - factors.add(2, z as Exponent); - n >>= z; + let mut n = num; + let n_zeros = num.trailing_zeros(); + if n_zeros > 0 { + factors.add(2, n_zeros as Exponent); + n >>= n_zeros; } + debug_assert_eq!(num, n * factors.product()); if n == 1 { return factors; } - let (factors, n) = table::factor(n, factors); + table::factor(&mut n, &mut factors); + debug_assert_eq!(num, n * factors.product()); - if n < (1 << 32) { - _factor::>(n, factors) - } else { - _factor::>(n, factors) + if n == 1 { + return factors; } + + let mut dec = Decomposition::one(); + dec.add(n, 1); + + while !dec.is_one() { + // Check correctness invariant + debug_assert_eq!(num, factors.product() * dec.product()); + + let (factor, exp) = dec.pop().unwrap(); + + if let Some(divisor) = find_factor(factor) { + let mut gcd_queue = Decomposition::one(); + + let quotient = factor / divisor; + let mut trivial_gcd = quotient == divisor; + if trivial_gcd { + gcd_queue.add(divisor, exp + 1); + } else { + gcd_queue.add(divisor, exp); + gcd_queue.add(quotient, exp); + } + + while !trivial_gcd { + debug_assert_eq!(factor, gcd_queue.product()); + + let mut tmp = Decomposition::one(); + trivial_gcd = true; + for i in 0..gcd_queue.0.len() - 1 { + let (mut a, exp_a) = gcd_queue.0[i]; + let (mut b, exp_b) = gcd_queue.0[i + 1]; + + if a == 1 { + continue; + } + + let g = gcd(a, b); + if g != 1 { + trivial_gcd = false; + a /= g; + b /= g; + } + if a != 1 { + tmp.add(a, exp_a); + } + if g != 1 { + tmp.add(g, exp_a + exp_b); + } + + if i + 1 != gcd_queue.0.len() - 1 { + gcd_queue.0[i + 1].0 = b; + } else if b != 1 { + tmp.add(b, exp_b); + } + } + gcd_queue = tmp; + } + + debug_assert_eq!(factor, gcd_queue.product()); + dec.0.extend(gcd_queue.0); + } else { + // factor is prime + factors.add(factor, exp); + } + } + + factors } #[cfg(test)] @@ -165,6 +230,19 @@ mod tests { use super::{factor, Factors}; use quickcheck::quickcheck; + #[test] + fn factor_correctly_recombines_prior_test_failures() { + let prior_failures = [ + // * integers with duplicate factors (ie, N.pow(M)) + 4566769_u64, // == 2137.pow(2) + 2044854919485649_u64, + 18446739546814299361_u64, + 18446738440860217487_u64, + 18446736729316206481_u64, + ]; + assert!(prior_failures.iter().all(|i| factor(*i).product() == *i)); + } + #[test] fn factor_recombines_small() { assert!((1..10_000) @@ -172,6 +250,15 @@ mod tests { .all(|i| factor(i).product() == i)); } + #[test] + fn factor_recombines_small_squares() { + // factor(18446736729316206481) == 4294966441 ** 2 ; causes debug_assert fault for repeated decomposition factor in add() + // ToDO: explain/combine with factor_18446736729316206481 and factor_18446739546814299361 tests + assert!((1..10_000) + .map(|i| (2 * i + 1) * (2 * i + 1)) + .all(|i| factor(i).product() == i)); + } + #[test] fn factor_recombines_overflowing() { assert!((0..250) @@ -182,7 +269,7 @@ mod tests { #[test] fn factor_recombines_strong_pseudoprime() { // This is a strong pseudoprime (wrt. miller_rabin::BASIS) - // and triggered a bug in rho::factor's codepath handling + // and triggered a bug in rho::factor's code path handling // miller_rabbin::Result::Composite let pseudoprime = 17179869183; for _ in 0..20 { @@ -213,7 +300,7 @@ impl quickcheck::Arbitrary for Factors { let mut n = u64::MAX; // Adam Kalai's algorithm for generating uniformly-distributed - // integers and their factorisation. + // integers and their factorization. // // See Generating Random Factored Numbers, Easily, J. Cryptology (2003) 'attempt: loop { diff --git a/src/uu/factor/src/numeric/gcd.rs b/src/uu/factor/src/numeric/gcd.rs index 01e4a23bd49..96a0366a217 100644 --- a/src/uu/factor/src/numeric/gcd.rs +++ b/src/uu/factor/src/numeric/gcd.rs @@ -79,7 +79,11 @@ mod tests { fn divisor(a: u64, b: u64) -> bool { // Test that gcd(a, b) divides a and b let g = gcd(a, b); - (g != 0 && a % g == 0 && b % g == 0) || (g == 0 && a == 0 && b == 0) + if g != 0 { + a % g == 0 && b % g == 0 + } else { + a == 0 && b == 0 // for g == 0 + } } fn commutative(a: u64, b: u64) -> bool { diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index d6ef796fc03..b62e801cbc7 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -14,7 +14,8 @@ use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { +pub(crate) fn factor(n: &mut u64, factors: &mut Factors) { + let mut num = *n; for &(prime, inv, ceil) in P_INVS_U64 { if num == 1 { break; @@ -42,5 +43,5 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { } } - (factors, num) + *n = num; } diff --git a/tests/by-util/test_factor.rs b/tests/by-util/test_factor.rs index 358f38fd3ac..5bde17cdbe6 100644 --- a/tests/by-util/test_factor.rs +++ b/tests/by-util/test_factor.rs @@ -6,34 +6,64 @@ // that was distributed with this source code. use crate::common::util::*; +use std::time::SystemTime; #[path = "../../src/uu/factor/sieve.rs"] mod sieve; -use self::sieve::Sieve; +extern crate conv; extern crate rand; + use self::rand::distributions::{Distribution, Uniform}; -use self::rand::{rngs::SmallRng, FromEntropy, Rng}; +use self::rand::{rngs::SmallRng, Rng, SeedableRng}; +use self::sieve::Sieve; const NUM_PRIMES: usize = 10000; -const LOG_PRIMES: f64 = 14.0; // ceil(log2(NUM_PRIMES)) - const NUM_TESTS: usize = 100; +#[test] +fn test_first_100000_integers() { + extern crate sha1; + + let n_integers = 100_000; + let mut instring = String::new(); + for i in 0..=n_integers { + instring.push_str(&(format!("{} ", i))[..]); + } + + println!("STDIN='{}'", instring); + let result = new_ucmd!().pipe_in(instring.as_bytes()).run(); + let stdout = result.stdout; + + assert!(result.success); + + // `seq 0 100000 | factor | sha1sum` => "4ed2d8403934fa1c76fe4b84c5d4b8850299c359" + let hash_check = sha1::Sha1::from(stdout.as_bytes()).hexdigest(); + assert_eq!(hash_check, "4ed2d8403934fa1c76fe4b84c5d4b8850299c359"); +} + #[test] fn test_random() { + use conv::prelude::*; + + let log_num_primes = f64::value_from(NUM_PRIMES).unwrap().log2().ceil(); let primes = Sieve::primes().take(NUM_PRIMES).collect::>(); - let mut rng = SmallRng::from_entropy(); - println!("(seed) rng={:?}", rng); + let rng_seed = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + println!("rng_seed={:?}", rng_seed); + let mut rng = SmallRng::seed_from_u64(rng_seed); + let mut rand_gt = move |min: u64| { - let mut product = 1u64; + let mut product = 1_u64; let mut factors = Vec::new(); while product < min { // log distribution---higher probability for lower numbers let factor; loop { - let next = rng.gen_range(0f64, LOG_PRIMES).exp2().floor() as usize; + let next = rng.gen_range(0_f64, log_num_primes).exp2().floor() as usize; if next < NUM_PRIMES { factor = primes[next]; break; @@ -73,9 +103,14 @@ fn test_random() { #[test] fn test_random_big() { - let mut rng = SmallRng::from_entropy(); - println!("(seed) rng={:?}", rng); - let bitrange_1 = Uniform::new(14usize, 51); + let rng_seed = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + println!("rng_seed={:?}", rng_seed); + let mut rng = SmallRng::seed_from_u64(rng_seed); + + let bitrange_1 = Uniform::new(14_usize, 51); let mut rand_64 = move || { // first, choose a random number of bits for the first factor let f_bit_1 = bitrange_1.sample(&mut rng); @@ -85,11 +120,11 @@ fn test_random_big() { // we will have a number of additional factors equal to nfacts + 1 // where nfacts is in [0, floor(rem/14) ) NOTE half-open interval // Each prime factor is at least 14 bits, hence floor(rem/14) - let nfacts = Uniform::new(0usize, rem / 14).sample(&mut rng); + let nfacts = Uniform::new(0_usize, rem / 14).sample(&mut rng); // we have to distribute extrabits among the (nfacts + 1) values let extrabits = rem - (nfacts + 1) * 14; // (remember, a Range is a half-open interval) - let extrarange = Uniform::new(0usize, extrabits + 1); + let extrarange = Uniform::new(0_usize, extrabits + 1); // to generate an even split of this range, generate n-1 random elements // in the range, add the desired total value to the end, sort this list, @@ -116,7 +151,7 @@ fn test_random_big() { let f_bits = f_bits; let mut nbits = 0; - let mut product = 1u64; + let mut product = 1_u64; let mut factors = Vec::new(); for bit in f_bits { assert!(bit < 37); @@ -161,6 +196,8 @@ fn test_big_primes() { } fn run(instring: &[u8], outstring: &[u8]) { + println!("STDIN='{}'", String::from_utf8_lossy(instring)); + println!("STDOUT(expected)='{}'", String::from_utf8_lossy(outstring)); // now run factor new_ucmd!() .pipe_in(instring)