diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 12fefb96..19ad05a8 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -1943,6 +1943,24 @@ inline constexpr uint& operator>>=(uint& x, const T& y) noexcept inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) noexcept { + // Fast path for mod >= 2^192, with x and y at most slightly bigger than mod. + // This is always the case when x and y are already reduced modulo mod. + // Based on https://github.com/holiman/uint256/pull/86. + if ((mod[3] != 0) && (x[3] <= mod[3]) && (y[3] <= mod[3])) + { + auto s = sub_with_carry(x, mod); + if (s.carry) + s.value = x; + + auto t = sub_with_carry(y, mod); + if (t.carry) + t.value = y; + + s = add_with_carry(s.value, t.value); + t = sub_with_carry(s.value, mod); + return (s.carry || !t.carry) ? t.value : s.value; + } + const auto s = add_with_carry(x, y); uint<256 + 64> n = s.value; n[4] = s.carry; diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt index 8a712563..0816fbcd 100644 --- a/test/benchmarks/CMakeLists.txt +++ b/test/benchmarks/CMakeLists.txt @@ -1,5 +1,5 @@ # intx: extended precision integer library. -# Copyright 2019-2020 Pawel Bylica. +# Copyright 2019 Pawel Bylica. # Licensed under the Apache License, Version 2.0. hunter_add_package(benchmark) @@ -8,6 +8,7 @@ find_package(benchmark CONFIG REQUIRED) find_package(GMP REQUIRED) add_executable(intx-bench + ../experimental/addmod.hpp bench_builtins.cpp bench_div.cpp bench_int128.cpp diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index b2a590ea..8b73eb38 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -1,9 +1,9 @@ // intx: extended precision integer library. -// Copyright 2019-2020 Pawel Bylica. +// Copyright 2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "../experimental/addmod.hpp" #include -#include #include #include #include @@ -89,8 +89,14 @@ static void mod(benchmark::State& state) } } } -BENCHMARK_TEMPLATE(mod, addmod)->DenseRange(64, 256, 64); -BENCHMARK_TEMPLATE(mod, mulmod)->DenseRange(64, 256, 64); +#define ARGS DenseRange(64, 256, 64) +BENCHMARK_TEMPLATE(mod, addmod)->ARGS; +BENCHMARK_TEMPLATE(mod, addmod_public)->ARGS; +BENCHMARK_TEMPLATE(mod, addmod_simple)->ARGS; +BENCHMARK_TEMPLATE(mod, addmod_prenormalize)->ARGS; +BENCHMARK_TEMPLATE(mod, addmod_daosvik)->ARGS; +BENCHMARK_TEMPLATE(mod, mulmod)->ARGS; +#undef ARGS template static void ecmod(benchmark::State& state) @@ -109,7 +115,10 @@ static void ecmod(benchmark::State& state) } } } -BENCHMARK_TEMPLATE(ecmod, addmod); +BENCHMARK_TEMPLATE(ecmod, addmod_public); +BENCHMARK_TEMPLATE(ecmod, addmod_simple); +BENCHMARK_TEMPLATE(ecmod, addmod_prenormalize); +BENCHMARK_TEMPLATE(ecmod, addmod_daosvik); BENCHMARK_TEMPLATE(ecmod, mulmod); diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp new file mode 100644 index 00000000..7f8daf00 --- /dev/null +++ b/test/experimental/addmod.hpp @@ -0,0 +1,64 @@ +// intx: extended precision integer library. +// Copyright 2021 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. +#pragma once + +#include + +namespace intx::test +{ +[[maybe_unused, gnu::noinline]] static uint256 addmod_public( + const uint256& x, const uint256& y, const uint256& mod) noexcept +{ + return addmod(x, y, mod); +} + +[[maybe_unused, gnu::noinline]] static uint256 addmod_simple( + const uint256& x, const uint256& y, const uint256& mod) noexcept +{ + const auto s = add_with_carry(x, y); + uint<256 + 64> n = s.value; + n[4] = s.carry; + return udivrem(n, mod).rem; +} + +[[maybe_unused, gnu::noinline]] static uint256 addmod_prenormalize( + const uint256& x, const uint256& y, const uint256& mod) noexcept +{ + const auto xm = x >= mod ? x % mod : x; + const auto ym = y >= mod ? y % mod : y; + + const auto s = add_with_carry(xm, ym); + auto sum = s.value; + if (s.carry || s.value >= mod) + sum -= mod; + return sum; +} + +[[maybe_unused, gnu::noinline]] static uint256 addmod_daosvik( + const uint256& x, const uint256& y, const uint256& m) noexcept +{ + // Fast path for m >= 2^192, with x and y at most slightly bigger than m. + // This is always the case when x and y are already reduced modulo m. + // Based on https://github.com/holiman/uint256/pull/86. + if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3])) + { + auto s = sub_with_carry(x, m); + if (s.carry) + s.value = x; + + auto t = sub_with_carry(y, m); + if (t.carry) + t.value = y; + + s = add_with_carry(s.value, t.value); + t = sub_with_carry(s.value, m); + return (s.carry || !t.carry) ? t.value : s.value; + } + + const auto s = add_with_carry(x, y); + uint<256 + 64> n = s.value; + n[4] = s.carry; + return udivrem(n, m).rem; +} +} // namespace intx::test diff --git a/test/fuzzer/opmod_fuzz.cpp b/test/fuzzer/opmod_fuzz.cpp index 1db88623..bb37da9d 100644 --- a/test/fuzzer/opmod_fuzz.cpp +++ b/test/fuzzer/opmod_fuzz.cpp @@ -2,6 +2,7 @@ // Copyright 2020 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "../experimental/addmod.hpp" #include "../utils/gmp.hpp" #include #include @@ -11,7 +12,7 @@ constexpr size_t input_size = 3 * sizeof(intx::uint256); std::ostream& operator<<(std::ostream& os, const intx::uint256& x) { - return os << to_string(x, 16); + return os << "0x" << to_string(x, 16); } extern "C" size_t LLVMFuzzerMutate(uint8_t* data, size_t size, size_t max_size); @@ -26,6 +27,14 @@ extern "C" size_t LLVMFuzzerCustomMutator( extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noexcept { + static constexpr decltype(&intx::addmod) addmod_fns[] = { + intx::addmod, + intx::test::addmod_public, + intx::test::addmod_simple, + intx::test::addmod_prenormalize, + intx::test::addmod_daosvik, + }; + if (data_size < input_size) return 0; @@ -37,14 +46,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noe if (m == 0) return 0; - const auto r = intx::addmod(a, b, m); - const auto e = intx::gmp::addmod(a, b, m); - - if (INTX_UNLIKELY(r != e)) + const auto expected = intx::gmp::addmod(a, b, m); + for (size_t i = 0; i < std::size(addmod_fns); ++i) { - std::cerr << "FAILED:\n " << a << " + " << b << " mod " << m << "\n result: " << r - << "\n expected: " << e << "\n"; - __builtin_trap(); + const auto result = addmod_fns[i](a, b, m); + + if (INTX_UNLIKELY(result != expected)) + { + std::cerr << "FAILED: [" << i << "]\n " << a << " + " << b << " mod " << m + << "\n result: " << result << "\n expected: " << expected << "\n"; + __builtin_trap(); + } } return 0; diff --git a/test/unittests/test_intx.cpp b/test/unittests/test_intx.cpp index f4d3e79c..c72e4fcb 100644 --- a/test/unittests/test_intx.cpp +++ b/test/unittests/test_intx.cpp @@ -2,6 +2,7 @@ // Copyright 2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "../experimental/addmod.hpp" #include "test_cases.hpp" #include "test_suite.hpp" @@ -164,15 +165,44 @@ TEST(uint256, exp) 83674153047243082998136072363356897816464308069321161820168341056719375264851_u256); } +static decltype(&addmod) addmod_impls[] = { + addmod, + test::addmod_public, + test::addmod_simple, + test::addmod_prenormalize, + test::addmod_daosvik, +}; + TEST(uint256, addmod) { - const auto x = 0xab0f4afc4c78548d4c30e1ab3449e3_u128; - const auto y = 0xf0a4485af15508e448cdddb0d1301664_u128; - const auto mod = 0xf0f9d0006f7b450e8f73f621a6ca3b56_u128; - EXPECT_EQ(addmod(x, y, mod), 0x5587a57e263c2a46a61870d59a24f1_u128); - const auto a = 0xdce049946eccbbf77ed1e8e2a3c89e15a8e897df2194150700f5096dea864cdb_u256; - const auto b = 0x397dd0df188eaffbf5216c6be56fe49002fbdc23b95a58a60f69e56f6f87f424_u256; - EXPECT_EQ(addmod(a, b, mod), 0x7533da49e8c499530049fbf08733976b_u128); + for (auto&& impl : addmod_impls) + { + const auto x = 0xab0f4afc4c78548d4c30e1ab3449e3_u128; + const auto y = 0xf0a4485af15508e448cdddb0d1301664_u128; + const auto mod = 0xf0f9d0006f7b450e8f73f621a6ca3b56_u128; + EXPECT_EQ(impl(x, y, mod), 0x5587a57e263c2a46a61870d59a24f1_u128); + const auto a = 0xdce049946eccbbf77ed1e8e2a3c89e15a8e897df2194150700f5096dea864cdb_u256; + const auto b = 0x397dd0df188eaffbf5216c6be56fe49002fbdc23b95a58a60f69e56f6f87f424_u256; + EXPECT_EQ(impl(a, b, mod), 0x7533da49e8c499530049fbf08733976b_u128); + } +} + +TEST(uint256, addmod_ec1) +{ + const auto x = 0x3bc8be7c7deebfbf00000000020000000100_u256; + const auto y = 0x100000000000000000000000000000000000001000000000000_u256; + const auto mod = 0x10000000000000000000000000000000000002b000000000000_u256; + for (auto&& impl : addmod_impls) + EXPECT_EQ(impl(x, y, mod), 0x3bc8be7c7deebfbeffffffd6020000000100_u256); +} + +TEST(uint256, addmod_ec2) +{ + const auto x = 0xffffffffffffffffffffffffffff000004020041fffffffffc00000060000020_u256; + const auto y = 0xffffffffffffffffffffffffffffffe6000000ffffffe60000febebeffffffff_u256; + const auto mod = 0xffffffffffffffffffe6000000ffffffe60000febebeffffffffffffffffffff_u256; + for (auto&& impl : addmod_impls) + EXPECT_EQ(impl(x, y, mod), 0x33fffffdfeffe63801ff448281e5fffcfebebf60000021_u256); } TEST(uint256, mulmod)