chfast · chfast · Dec 17, 2021 · Aug 26, 2021 · Dec 17, 2021 · axic
diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
@@ -1943,6 +1943,24 @@ inline constexpr uint<N>& operator>>=(uint<N>& x, const T& y) noexcept
 
 inline uint256 addmod(const uint256& x, const uint256& y, const uint256& mod) noexcept
 {
+    // Fast path for mod >= 2^192, with x and y at most slightly bigger than mod.
+    // This is always the case when x and y are already reduced modulo mod.
+    // Based on https://github.com/holiman/uint256/pull/86.
+    if ((mod[3] != 0) && (x[3] <= mod[3]) && (y[3] <= mod[3]))
+    {
+        auto s = sub_with_carry(x, mod);
+        if (s.carry)
+            s.value = x;
+
+        auto t = sub_with_carry(y, mod);
+        if (t.carry)
+            t.value = y;
+
+        s = add_with_carry(s.value, t.value);
+        t = sub_with_carry(s.value, mod);
+        return (s.carry || !t.carry) ? t.value : s.value;
+    }
+
     const auto s = add_with_carry(x, y);
     uint<256 + 64> n = s.value;
     n[4] = s.carry;

diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
 # intx: extended precision integer library.
-# Copyright 2019-2020 Pawel Bylica.
+# Copyright 2019 Pawel Bylica.
 # Licensed under the Apache License, Version 2.0.
 
 hunter_add_package(benchmark)
@@ -8,6 +8,7 @@ find_package(benchmark CONFIG REQUIRED)
 find_package(GMP REQUIRED)
 
 add_executable(intx-bench
+    ../experimental/addmod.hpp
     bench_builtins.cpp
     bench_div.cpp
     bench_int128.cpp

diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp
@@ -1,9 +1,9 @@
 // intx: extended precision integer library.
-// Copyright 2019-2020 Pawel Bylica.
+// Copyright 2019 Pawel Bylica.
 // Licensed under the Apache License, Version 2.0.
 
+#include "../experimental/addmod.hpp"
 #include <benchmark/benchmark.h>
-#include <experimental/add.hpp>
 #include <intx/intx.hpp>
 #include <test/utils/gmp.hpp>
 #include <test/utils/random.hpp>
@@ -89,8 +89,14 @@ static void mod(benchmark::State& state)
         }
     }
 }
-BENCHMARK_TEMPLATE(mod, addmod)->DenseRange(64, 256, 64);
-BENCHMARK_TEMPLATE(mod, mulmod)->DenseRange(64, 256, 64);
+#define ARGS DenseRange(64, 256, 64)
+BENCHMARK_TEMPLATE(mod, addmod)->ARGS;
+BENCHMARK_TEMPLATE(mod, addmod_public)->ARGS;
+BENCHMARK_TEMPLATE(mod, addmod_simple)->ARGS;
+BENCHMARK_TEMPLATE(mod, addmod_prenormalize)->ARGS;
+BENCHMARK_TEMPLATE(mod, addmod_daosvik)->ARGS;
+BENCHMARK_TEMPLATE(mod, mulmod)->ARGS;
+#undef ARGS
 
 template <uint256 ModFn(const uint256&, const uint256&, const uint256&)>
 static void ecmod(benchmark::State& state)
@@ -109,7 +115,10 @@ static void ecmod(benchmark::State& state)
         }
     }
 }
-BENCHMARK_TEMPLATE(ecmod, addmod);
+BENCHMARK_TEMPLATE(ecmod, addmod_public);
+BENCHMARK_TEMPLATE(ecmod, addmod_simple);
+BENCHMARK_TEMPLATE(ecmod, addmod_prenormalize);
+BENCHMARK_TEMPLATE(ecmod, addmod_daosvik);
 BENCHMARK_TEMPLATE(ecmod, mulmod);
 
 

diff --git a/test/experimental/addmod.hpp b/test/experimental/addmod.hpp
@@ -0,0 +1,64 @@
+// intx: extended precision integer library.
+// Copyright 2021 Pawel Bylica.
+// Licensed under the Apache License, Version 2.0.
+#pragma once
+
+#include <intx/intx.hpp>
+
+namespace intx::test
+{
+[[maybe_unused, gnu::noinline]] static uint256 addmod_public(
+    const uint256& x, const uint256& y, const uint256& mod) noexcept
+{
+    return addmod(x, y, mod);
+}
+
+[[maybe_unused, gnu::noinline]] static uint256 addmod_simple(
+    const uint256& x, const uint256& y, const uint256& mod) noexcept
+{
+    const auto s = add_with_carry(x, y);
+    uint<256 + 64> n = s.value;
+    n[4] = s.carry;
+    return udivrem(n, mod).rem;
+}
+
+[[maybe_unused, gnu::noinline]] static uint256 addmod_prenormalize(
+    const uint256& x, const uint256& y, const uint256& mod) noexcept
+{
+    const auto xm = x >= mod ? x % mod : x;
+    const auto ym = y >= mod ? y % mod : y;
+
+    const auto s = add_with_carry(xm, ym);
+    auto sum = s.value;
+    if (s.carry || s.value >= mod)
+        sum -= mod;
+    return sum;
+}
+
+[[maybe_unused, gnu::noinline]] static uint256 addmod_daosvik(
+    const uint256& x, const uint256& y, const uint256& m) noexcept
+{
+    // Fast path for m >= 2^192, with x and y at most slightly bigger than m.
+    // This is always the case when x and y are already reduced modulo m.
+    // Based on https://github.com/holiman/uint256/pull/86.
+    if ((m[3] != 0) && (x[3] <= m[3]) && (y[3] <= m[3]))
+    {
+        auto s = sub_with_carry(x, m);
+        if (s.carry)
+            s.value = x;
+
+        auto t = sub_with_carry(y, m);
+        if (t.carry)
+            t.value = y;
+
+        s = add_with_carry(s.value, t.value);
+        t = sub_with_carry(s.value, m);
+        return (s.carry || !t.carry) ? t.value : s.value;
+    }
+
+    const auto s = add_with_carry(x, y);
+    uint<256 + 64> n = s.value;
+    n[4] = s.carry;
+    return udivrem(n, m).rem;
+}
+}  // namespace intx::test
diff --git a/test/fuzzer/opmod_fuzz.cpp b/test/fuzzer/opmod_fuzz.cpp
@@ -2,6 +2,7 @@
 // Copyright 2020 Pawel Bylica.
 // Licensed under the Apache License, Version 2.0.
 
+#include "../experimental/addmod.hpp"
 #include "../utils/gmp.hpp"
 #include <intx/intx.hpp>
 #include <cstring>
@@ -11,7 +12,7 @@ constexpr size_t input_size = 3 * sizeof(intx::uint256);
 
 std::ostream& operator<<(std::ostream& os, const intx::uint256& x)
 {
-    return os << to_string(x, 16);
+    return os << "0x" << to_string(x, 16);
 }
 
 extern "C" size_t LLVMFuzzerMutate(uint8_t* data, size_t size, size_t max_size);
@@ -26,6 +27,14 @@ extern "C" size_t LLVMFuzzerCustomMutator(
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noexcept
 {
+    static constexpr decltype(&intx::addmod) addmod_fns[] = {
+        intx::addmod,
+        intx::test::addmod_public,
+        intx::test::addmod_simple,
+        intx::test::addmod_prenormalize,
+        intx::test::addmod_daosvik,
+    };
+
     if (data_size < input_size)
         return 0;
 
@@ -37,14 +46,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noe
     if (m == 0)
         return 0;
 
-    const auto r = intx::addmod(a, b, m);
-    const auto e = intx::gmp::addmod(a, b, m);
-
-    if (INTX_UNLIKELY(r != e))
+    const auto expected = intx::gmp::addmod(a, b, m);
+    for (size_t i = 0; i < std::size(addmod_fns); ++i)
     {
-        std::cerr << "FAILED:\n  " << a << " + " << b << " mod " << m << "\n  result:   " << r
-                  << "\n  expected: " << e << "\n";
-        __builtin_trap();
+        const auto result = addmod_fns[i](a, b, m);
+
+        if (INTX_UNLIKELY(result != expected))
+        {
+            std::cerr << "FAILED: [" << i << "]\n  " << a << " + " << b << " mod " << m
+                      << "\n  result:   " << result << "\n  expected: " << expected << "\n";
+            __builtin_trap();
+        }
     }
 
     return 0;

diff --git a/test/unittests/test_intx.cpp b/test/unittests/test_intx.cpp
@@ -2,6 +2,7 @@
 // Copyright 2019 Pawel Bylica.
 // Licensed under the Apache License, Version 2.0.
 
+#include "../experimental/addmod.hpp"
 #include "test_cases.hpp"
 #include "test_suite.hpp"
 
@@ -164,15 +165,44 @@ TEST(uint256, exp)
         83674153047243082998136072363356897816464308069321161820168341056719375264851_u256);
 }
 
+static decltype(&addmod) addmod_impls[] = {
+    addmod,
+    test::addmod_public,
+    test::addmod_simple,
+    test::addmod_prenormalize,
+    test::addmod_daosvik,
+};
+
 TEST(uint256, addmod)
 {
-    const auto x = 0xab0f4afc4c78548d4c30e1ab3449e3_u128;
-    const auto y = 0xf0a4485af15508e448cdddb0d1301664_u128;
-    const auto mod = 0xf0f9d0006f7b450e8f73f621a6ca3b56_u128;
-    EXPECT_EQ(addmod(x, y, mod), 0x5587a57e263c2a46a61870d59a24f1_u128);
-    const auto a = 0xdce049946eccbbf77ed1e8e2a3c89e15a8e897df2194150700f5096dea864cdb_u256;
-    const auto b = 0x397dd0df188eaffbf5216c6be56fe49002fbdc23b95a58a60f69e56f6f87f424_u256;
-    EXPECT_EQ(addmod(a, b, mod), 0x7533da49e8c499530049fbf08733976b_u128);
+    for (auto&& impl : addmod_impls)
+    {
+        const auto x = 0xab0f4afc4c78548d4c30e1ab3449e3_u128;
+        const auto y = 0xf0a4485af15508e448cdddb0d1301664_u128;
+        const auto mod = 0xf0f9d0006f7b450e8f73f621a6ca3b56_u128;
+        EXPECT_EQ(impl(x, y, mod), 0x5587a57e263c2a46a61870d59a24f1_u128);
+        const auto a = 0xdce049946eccbbf77ed1e8e2a3c89e15a8e897df2194150700f5096dea864cdb_u256;
+        const auto b = 0x397dd0df188eaffbf5216c6be56fe49002fbdc23b95a58a60f69e56f6f87f424_u256;
+        EXPECT_EQ(impl(a, b, mod), 0x7533da49e8c499530049fbf08733976b_u128);
+    }
+}
+
+TEST(uint256, addmod_ec1)
+{
+    const auto x = 0x3bc8be7c7deebfbf00000000020000000100_u256;
+    const auto y = 0x100000000000000000000000000000000000001000000000000_u256;
+    const auto mod = 0x10000000000000000000000000000000000002b000000000000_u256;
+    for (auto&& impl : addmod_impls)
+        EXPECT_EQ(impl(x, y, mod), 0x3bc8be7c7deebfbeffffffd6020000000100_u256);
+}
+
+TEST(uint256, addmod_ec2)
+{
+    const auto x = 0xffffffffffffffffffffffffffff000004020041fffffffffc00000060000020_u256;
+    const auto y = 0xffffffffffffffffffffffffffffffe6000000ffffffe60000febebeffffffff_u256;
+    const auto mod = 0xffffffffffffffffffe6000000ffffffe60000febebeffffffffffffffffffff_u256;
+    for (auto&& impl : addmod_impls)
+        EXPECT_EQ(impl(x, y, mod), 0x33fffffdfeffe63801ff448281e5fffcfebebf60000021_u256);
 }
 
 TEST(uint256, mulmod)