diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index dcbfedb499a..9b2af02d2ea 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -25,12 +25,14 @@
 #include <iomanip>
 #include <iostream>
 #include <sstream>
+#include <memory>
 
 #include "nnue/network.h"
 #include "nnue/nnue_misc.h"
 #include "position.h"
 #include "types.h"
 #include "uci.h"
+#include "nnue/nnue_accumulator.h"
 
 namespace Stockfish {
 
@@ -45,7 +47,10 @@ int Eval::simple_eval(const Position& pos, Color c) {
 
 // Evaluate is the evaluator for the outer world. It returns a static evaluation
 // of the position from the point of view of the side to move.
-Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {
+Value Eval::evaluate(const Eval::NNUE::Networks&    networks,
+                     const Position&                pos,
+                     Eval::NNUE::AccumulatorCaches& caches,
+                     int                            optimism) {
 
     assert(!pos.checkers());
 
@@ -55,8 +60,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
     int  nnueComplexity;
     int  v;
 
-    Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
-                          : networks.big.evaluate(pos, true, &nnueComplexity, false);
+    Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
+                          : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
 
     const auto adjustEval = [&](int optDiv, int nnueDiv, int pawnCountConstant, int pawnCountMul,
                                 int npmConstant, int evalDiv, int shufflingConstant,
@@ -94,20 +99,22 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
 // Trace scores are from white's point of view
 std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
 
+    auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>();
+
     if (pos.checkers())
         return "Final evaluation: none (in check)";
 
     std::stringstream ss;
     ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
-    ss << '\n' << NNUE::trace(pos, networks) << '\n';
+    ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
 
     ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
 
-    Value v = networks.big.evaluate(pos, false);
+    Value v = networks.big.evaluate(pos, &caches->big, false);
     v       = pos.side_to_move() == WHITE ? v : -v;
     ss << "NNUE evaluation        " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
 
-    v = evaluate(networks, pos, VALUE_ZERO);
+    v = evaluate(networks, pos, *caches, VALUE_ZERO);
     v = pos.side_to_move() == WHITE ? v : -v;
     ss << "Final evaluation       " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
     ss << " [with scaled NNUE, ...]";
diff --git a/src/evaluate.h b/src/evaluate.h
index da9c7074ec1..38615ff7d68 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -40,14 +40,16 @@ constexpr inline int SmallNetThreshold = 1274, PsqtOnlyThreshold = 2389;
 
 namespace NNUE {
 struct Networks;
+struct AccumulatorCaches;
 }
 
 std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
 
 int   simple_eval(const Position& pos, Color c);
-Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);
-
-
+Value evaluate(const NNUE::Networks&          networks,
+               const Position&                pos,
+               Eval::NNUE::AccumulatorCaches& caches,
+               int                            optimism);
 }  // namespace Eval
 
 }  // namespace Stockfish
diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp
index 5789db4844a..71782a7b731 100644
--- a/src/nnue/features/half_ka_v2_hm.cpp
+++ b/src/nnue/features/half_ka_v2_hm.cpp
@@ -23,7 +23,7 @@
 #include "../../bitboard.h"
 #include "../../position.h"
 #include "../../types.h"
-#include "../nnue_common.h"
+#include "../nnue_accumulator.h"
 
 namespace Stockfish::Eval::NNUE::Features {
 
@@ -49,6 +49,8 @@ void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active)
 // Explicit template instantiations
 template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
 template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
+template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
+template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
 
 // Get a list of indices for recently changed features
 template<Color Perspective>
diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h
index 8363184f430..96349704745 100644
--- a/src/nnue/features/half_ka_v2_hm.h
+++ b/src/nnue/features/half_ka_v2_hm.h
@@ -63,10 +63,6 @@ class HalfKAv2_hm {
       {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
        PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
 
-    // Index of a feature for a given king position and another piece on some square
-    template<Color Perspective>
-    static IndexType make_index(Square s, Piece pc, Square ksq);
-
    public:
     // Feature name
     static constexpr const char* Name = "HalfKAv2_hm(Friend)";
@@ -126,6 +122,10 @@ class HalfKAv2_hm {
     static constexpr IndexType MaxActiveDimensions = 32;
     using IndexList                                = ValueList<IndexType, MaxActiveDimensions>;
 
+    // Index of a feature for a given king position and another piece on some square
+    template<Color Perspective>
+    static IndexType make_index(Square s, Piece pc, Square ksq);
+
     // Get a list of indices for active features
     template<Color Perspective>
     static void append_active_indices(const Position& pos, IndexList& active);
diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp
index bea3e7cb398..656ad97a1e3 100644
--- a/src/nnue/network.cpp
+++ b/src/nnue/network.cpp
@@ -186,10 +186,11 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
 
 
 template<typename Arch, typename Transformer>
-Value Network<Arch, Transformer>::evaluate(const Position& pos,
-                                           bool            adjusted,
-                                           int*            complexity,
-                                           bool            psqtOnly) const {
+Value Network<Arch, Transformer>::evaluate(const Position&                         pos,
+                                           AccumulatorCaches::Cache<FTDimensions>* cache,
+                                           bool                                    adjusted,
+                                           int*                                    complexity,
+                                           bool                                    psqtOnly) const {
     // We manually align the arrays on the stack because with gcc < 9.3
     // overaligning stack variables with alignas() doesn't work correctly.
 
@@ -197,20 +198,21 @@ Value Network<Arch, Transformer>::evaluate(const Position& pos,
     constexpr int      delta     = 24;
 
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
-    TransformedFeatureType transformedFeaturesUnaligned
-      [FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
-       + alignment / sizeof(TransformedFeatureType)];
+    TransformedFeatureType
+      transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+                                   + alignment / sizeof(TransformedFeatureType)];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
 #else
-    alignas(alignment) TransformedFeatureType transformedFeatures
-      [FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
+    alignas(alignment) TransformedFeatureType
+      transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
 
     const int  bucket = (pos.count<ALL_PIECES>() - 1) / 4;
-    const auto psqt   = featureTransformer->transform(pos, transformedFeatures, bucket, psqtOnly);
+    const auto psqt =
+      featureTransformer->transform(pos, cache, transformedFeatures, bucket, psqtOnly);
     const auto positional = !psqtOnly ? (network[bucket]->propagate(transformedFeatures)) : 0;
 
     if (complexity)
@@ -255,26 +257,29 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
 
 
 template<typename Arch, typename Transformer>
-void Network<Arch, Transformer>::hint_common_access(const Position& pos, bool psqtOnl) const {
-    featureTransformer->hint_common_access(pos, psqtOnl);
+void Network<Arch, Transformer>::hint_common_access(const Position&                         pos,
+                                                    AccumulatorCaches::Cache<FTDimensions>* cache,
+                                                    bool psqtOnl) const {
+    featureTransformer->hint_common_access(pos, cache, psqtOnl);
 }
 
-
 template<typename Arch, typename Transformer>
-NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) const {
+NnueEvalTrace
+Network<Arch, Transformer>::trace_evaluate(const Position&                         pos,
+                                           AccumulatorCaches::Cache<FTDimensions>* cache) const {
     // We manually align the arrays on the stack because with gcc < 9.3
     // overaligning stack variables with alignas() doesn't work correctly.
     constexpr uint64_t alignment = CacheLineSize;
 
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
-    TransformedFeatureType transformedFeaturesUnaligned
-      [FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
-       + alignment / sizeof(TransformedFeatureType)];
+    TransformedFeatureType
+      transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+                                   + alignment / sizeof(TransformedFeatureType)];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
 #else
-    alignas(alignment) TransformedFeatureType transformedFeatures
-      [FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
+    alignas(alignment) TransformedFeatureType
+      transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
@@ -284,7 +289,7 @@ NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) co
     for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
     {
         const auto materialist =
-          featureTransformer->transform(pos, transformedFeatures, bucket, false);
+          featureTransformer->transform(pos, cache, transformedFeatures, bucket, false);
         const auto positional = network[bucket]->propagate(transformedFeatures);
 
         t.psqt[bucket]       = static_cast<Value>(materialist / OutputScale);
diff --git a/src/nnue/network.h b/src/nnue/network.h
index 21e1c622205..df59732d955 100644
--- a/src/nnue/network.h
+++ b/src/nnue/network.h
@@ -31,10 +31,10 @@
 #include "nnue_architecture.h"
 #include "nnue_feature_transformer.h"
 #include "nnue_misc.h"
+#include "nnue_accumulator.h"
 
 namespace Stockfish::Eval::NNUE {
 
-
 enum class EmbeddedNNUEType {
     BIG,
     SMALL,
@@ -43,6 +43,8 @@ enum class EmbeddedNNUEType {
 
 template<typename Arch, typename Transformer>
 class Network {
+    static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
+
    public:
     Network(EvalFile file, EmbeddedNNUEType type) :
         evalFile(file),
@@ -51,17 +53,20 @@ class Network {
     void load(const std::string& rootDirectory, std::string evalfilePath);
     bool save(const std::optional<std::string>& filename) const;
 
+    Value evaluate(const Position&                         pos,
+                   AccumulatorCaches::Cache<FTDimensions>* cache,
+                   bool                                    adjusted   = false,
+                   int*                                    complexity = nullptr,
+                   bool                                    psqtOnly   = false) const;
 
-    Value evaluate(const Position& pos,
-                   bool            adjusted   = false,
-                   int*            complexity = nullptr,
-                   bool            psqtOnly   = false) const;
 
-
-    void hint_common_access(const Position& pos, bool psqtOnl) const;
+    void hint_common_access(const Position&                         pos,
+                            AccumulatorCaches::Cache<FTDimensions>* cache,
+                            bool                                    psqtOnl) const;
 
     void          verify(std::string evalfilePath) const;
-    NnueEvalTrace trace_evaluate(const Position& pos) const;
+    NnueEvalTrace trace_evaluate(const Position&                         pos,
+                                 AccumulatorCaches::Cache<FTDimensions>* cache) const;
 
    private:
     void load_user_net(const std::string&, const std::string&);
@@ -89,6 +94,9 @@ class Network {
 
     // Hash value of evaluation function structure
     static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
+
+    template<IndexType Size>
+    friend struct AccumulatorCaches::Cache;
 };
 
 // Definitions of the network types
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
index c0746b4ee86..8d73dbef5ad 100644
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -28,13 +28,75 @@
 
 namespace Stockfish::Eval::NNUE {
 
+using BiasType       = std::int16_t;
+using PSQTWeightType = std::int32_t;
+using IndexType      = std::uint32_t;
+
 // Class that holds the result of affine transformation of input features
 template<IndexType Size>
 struct alignas(CacheLineSize) Accumulator {
-    std::int16_t accumulation[2][Size];
-    std::int32_t psqtAccumulation[2][PSQTBuckets];
-    bool         computed[2];
-    bool         computedPSQT[2];
+    std::int16_t accumulation[COLOR_NB][Size];
+    std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
+    bool         computed[COLOR_NB];
+    bool         computedPSQT[COLOR_NB];
+};
+
+
+// AccumulatorCaches struct provides per-thread accumulator caches, where each
+// cache contains multiple entries for each of the possible king squares.
+// When the accumulator needs to be refreshed, the cached entry is used to more
+// efficiently update the accumulator, instead of rebuilding it from scratch.
+// This idea, was first described by Luecx (author of Koivisto) and
+// is commonly referred to as "Finny Tables".
+struct AccumulatorCaches {
+
+    template<IndexType Size>
+    struct alignas(CacheLineSize) Cache {
+
+        struct alignas(CacheLineSize) Entry {
+            BiasType       accumulation[COLOR_NB][Size];
+            PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
+            Bitboard       byColorBB[COLOR_NB][COLOR_NB];
+            Bitboard       byTypeBB[COLOR_NB][PIECE_TYPE_NB];
+
+            // To initialize a refresh entry, we set all its bitboards empty,
+            // so we put the biases in the accumulation, without any weights on top
+            void clear(const BiasType* biases) {
+
+                std::memset(byColorBB, 0, sizeof(byColorBB));
+                std::memset(byTypeBB, 0, sizeof(byTypeBB));
+
+                std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
+                std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
+
+                std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
+            }
+        };
+
+        template<typename Network>
+        void clear(const Network& network) {
+            for (auto& entry : entries)
+                entry.clear(network.featureTransformer->biases);
+        }
+
+        void clear(const BiasType* biases) {
+            for (auto& entry : entries)
+                entry.clear(biases);
+        }
+
+        Entry& operator[](Square sq) { return entries[sq]; }
+
+        std::array<Entry, SQUARE_NB> entries;
+    };
+
+    template<typename Networks>
+    void clear(const Networks& networks) {
+        big.clear(networks.big);
+    }
+
+    // When adding a new cache for a network, i.e. the smallnet
+    // the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
+    Cache<TransformedFeatureDimensionsBig> big;
 };
 
 }  // namespace Stockfish::Eval::NNUE
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
index 0a0f4217fdb..88f0e4031a4 100644
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -195,10 +195,10 @@ template<IndexType                                 TransformedFeatureDimensions,
          Accumulator<TransformedFeatureDimensions> StateInfo::*accPtr>
 class FeatureTransformer {
 
-   private:
     // Number of output dimensions for one side
     static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
 
+   private:
 #ifdef VECTOR
     static constexpr int NumRegs =
       BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
@@ -306,10 +306,13 @@ class FeatureTransformer {
     }
 
     // Convert input features
-    std::int32_t
-    transform(const Position& pos, OutputType* output, int bucket, bool psqtOnly) const {
-        update_accumulator<WHITE>(pos, psqtOnly);
-        update_accumulator<BLACK>(pos, psqtOnly);
+    std::int32_t transform(const Position&                           pos,
+                           AccumulatorCaches::Cache<HalfDimensions>* cache,
+                           OutputType*                               output,
+                           int                                       bucket,
+                           bool                                      psqtOnly) const {
+        update_accumulator<WHITE>(pos, cache, psqtOnly);
+        update_accumulator<BLACK>(pos, cache, psqtOnly);
 
         const Color perspectives[2]  = {pos.side_to_move(), ~pos.side_to_move()};
         const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
@@ -371,9 +374,11 @@ class FeatureTransformer {
         return psqt;
     }  // end of function transform()
 
-    void hint_common_access(const Position& pos, bool psqtOnly) const {
-        hint_common_access_for_perspective<WHITE>(pos, psqtOnly);
-        hint_common_access_for_perspective<BLACK>(pos, psqtOnly);
+    void hint_common_access(const Position&                           pos,
+                            AccumulatorCaches::Cache<HalfDimensions>* cache,
+                            bool                                      psqtOnly) const {
+        hint_common_access_for_perspective<WHITE>(pos, cache, psqtOnly);
+        hint_common_access_for_perspective<BLACK>(pos, cache, psqtOnly);
     }
 
    private:
@@ -650,7 +655,161 @@ class FeatureTransformer {
     }
 
     template<Color Perspective>
-    void update_accumulator_refresh(const Position& pos, bool psqtOnly) const {
+    void update_accumulator_refresh_cache(const Position&                           pos,
+                                          AccumulatorCaches::Cache<HalfDimensions>* cache) const {
+        assert(cache != nullptr);
+
+        Square ksq = pos.square<KING>(Perspective);
+
+        auto& entry = (*cache)[ksq];
+
+        auto& accumulator                     = pos.state()->*accPtr;
+        accumulator.computed[Perspective]     = true;
+        accumulator.computedPSQT[Perspective] = true;
+
+        FeatureSet::IndexList removed, added;
+        for (Color c : {WHITE, BLACK})
+        {
+            for (PieceType pt = PAWN; pt <= KING; ++pt)
+            {
+                const Piece    piece = make_piece(c, pt);
+                const Bitboard oldBB =
+                  entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt];
+                const Bitboard newBB    = pos.pieces(c, pt);
+                Bitboard       toRemove = oldBB & ~newBB;
+                Bitboard       toAdd    = newBB & ~oldBB;
+
+                while (toRemove)
+                {
+                    Square sq = pop_lsb(toRemove);
+                    removed.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
+                }
+                while (toAdd)
+                {
+                    Square sq = pop_lsb(toAdd);
+                    added.push_back(FeatureSet::make_index<Perspective>(sq, piece, ksq));
+                }
+            }
+        }
+
+#ifdef VECTOR
+        vec_t      acc[NumRegs];
+        psqt_vec_t psqt[NumPsqtRegs];
+
+        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
+        {
+            auto entryTile =
+              reinterpret_cast<vec_t*>(&entry.accumulation[Perspective][j * TileHeight]);
+            for (IndexType k = 0; k < NumRegs; ++k)
+                acc[k] = entryTile[k];
+
+            for (int i = 0; i < int(added.size()); ++i)
+            {
+                IndexType       index  = added[i];
+                const IndexType offset = HalfDimensions * index + j * TileHeight;
+                auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
+
+                for (unsigned k = 0; k < NumRegs; ++k)
+                    acc[k] = vec_add_16(acc[k], column[k]);
+            }
+            for (int i = 0; i < int(removed.size()); ++i)
+            {
+                IndexType       index  = removed[i];
+                const IndexType offset = HalfDimensions * index + j * TileHeight;
+                auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
+
+                for (unsigned k = 0; k < NumRegs; ++k)
+                    acc[k] = vec_sub_16(acc[k], column[k]);
+            }
+
+            for (IndexType k = 0; k < NumRegs; k++)
+                vec_store(&entryTile[k], acc[k]);
+        }
+
+        for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
+        {
+            auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
+              &entry.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                psqt[k] = entryTilePsqt[k];
+
+            for (int i = 0; i < int(added.size()); ++i)
+            {
+                IndexType       index  = added[i];
+                const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+                auto columnPsqt        = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+                for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                    psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
+            }
+            for (int i = 0; i < int(removed.size()); ++i)
+            {
+                IndexType       index  = removed[i];
+                const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
+                auto columnPsqt        = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);
+
+                for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                    psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]);
+            }
+
+            for (std::size_t k = 0; k < NumPsqtRegs; ++k)
+                vec_store_psqt(&entryTilePsqt[k], psqt[k]);
+        }
+
+#else
+
+        for (const auto index : added)
+        {
+            const IndexType offset = HalfDimensions * index;
+            for (IndexType j = 0; j < HalfDimensions; ++j)
+                entry.accumulation[Perspective][j] += weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+                entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k];
+        }
+        for (const auto index : removed)
+        {
+            const IndexType offset = HalfDimensions * index;
+            for (IndexType j = 0; j < HalfDimensions; ++j)
+                entry.accumulation[Perspective][j] -= weights[offset + j];
+
+            for (std::size_t k = 0; k < PSQTBuckets; ++k)
+                entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k];
+        }
+
+#endif
+
+        // The accumulator of the refresh entry has been updated.
+        // Now copy its content to the actual accumulator we were refreshing
+
+        std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective],
+                    sizeof(int32_t) * PSQTBuckets);
+
+        std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective],
+                    sizeof(BiasType) * HalfDimensions);
+
+        for (Color c : {WHITE, BLACK})
+            entry.byColorBB[Perspective][c] = pos.pieces(c);
+
+        for (PieceType pt = PAWN; pt <= KING; ++pt)
+            entry.byTypeBB[Perspective][pt] = pos.pieces(pt);
+    }
+
+    template<Color Perspective>
+    void
+    update_accumulator_refresh(const Position&                                            pos,
+                               [[maybe_unused]] AccumulatorCaches::Cache<HalfDimensions>* cache,
+                               bool psqtOnly) const {
+
+        // When we are refreshing the accumulator of the big net,
+        // redirect to the version of refresh that uses the refresh table.
+        // Using the cache for the small net is not beneficial.
+        if constexpr (HalfDimensions == Eval::NNUE::TransformedFeatureDimensionsBig)
+        {
+            update_accumulator_refresh_cache<Perspective>(pos, cache);
+            return;
+        }
+
 #ifdef VECTOR
         // Gcc-10.2 unnecessarily spills AVX2 registers if this array
         // is defined in the VECTOR code below, once in each branch
@@ -764,7 +923,9 @@ class FeatureTransformer {
     }
 
     template<Color Perspective>
-    void hint_common_access_for_perspective(const Position& pos, bool psqtOnly) const {
+    void hint_common_access_for_perspective(const Position&                           pos,
+                                            AccumulatorCaches::Cache<HalfDimensions>* cache,
+                                            bool psqtOnly) const {
 
         // Works like update_accumulator, but performs less work.
         // Updates ONLY the accumulator for pos.
@@ -787,11 +948,13 @@ class FeatureTransformer {
                                                            psqtOnly);
         }
         else
-            update_accumulator_refresh<Perspective>(pos, psqtOnly);
+            update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
     }
 
     template<Color Perspective>
-    void update_accumulator(const Position& pos, bool psqtOnly) const {
+    void update_accumulator(const Position&                           pos,
+                            AccumulatorCaches::Cache<HalfDimensions>* cache,
+                            bool                                      psqtOnly) const {
 
         auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
 
@@ -813,9 +976,12 @@ class FeatureTransformer {
                                                            psqtOnly);
         }
         else
-            update_accumulator_refresh<Perspective>(pos, psqtOnly);
+            update_accumulator_refresh<Perspective>(pos, cache, psqtOnly);
     }
 
+    template<IndexType Size>
+    friend struct AccumulatorCaches::Cache;
+
     alignas(CacheLineSize) BiasType biases[HalfDimensions];
     alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
     alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets];
diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp
index 3fa6e1b6180..51838fefa44 100644
--- a/src/nnue/nnue_misc.cpp
+++ b/src/nnue/nnue_misc.cpp
@@ -42,13 +42,15 @@ namespace Stockfish::Eval::NNUE {
 constexpr std::string_view PieceToChar(" PNBRQK  pnbrqk");
 
 
-void hint_common_parent_position(const Position& pos, const Networks& networks) {
+void hint_common_parent_position(const Position&    pos,
+                                 const Networks&    networks,
+                                 AccumulatorCaches& caches) {
 
     int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
     if (simpleEvalAbs > Eval::SmallNetThreshold)
-        networks.small.hint_common_access(pos, simpleEvalAbs > Eval::PsqtOnlyThreshold);
+        networks.small.hint_common_access(pos, nullptr, simpleEvalAbs > Eval::PsqtOnlyThreshold);
     else
-        networks.big.hint_common_access(pos, false);
+        networks.big.hint_common_access(pos, &caches.big, false);
 }
 
 namespace {
@@ -104,7 +106,8 @@ void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& p
 
 // Returns a string with the value of each piece on a board,
 // and a table for (PSQT, Layers) values bucket by bucket.
-std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
+std::string
+trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
 
     std::stringstream ss;
 
@@ -130,7 +133,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
 
     // We estimate the value of each piece by doing a differential evaluation from
     // the current base eval, simulating the removal of the piece from its square.
-    Value base = networks.big.evaluate(pos);
+    Value base = networks.big.evaluate(pos, &caches.big);
     base       = pos.side_to_move() == WHITE ? base : -base;
 
     for (File f = FILE_A; f <= FILE_H; ++f)
@@ -149,7 +152,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
                   st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
                     false;
 
-                Value eval = networks.big.evaluate(pos);
+                Value eval = networks.big.evaluate(pos, &caches.big);
                 eval       = pos.side_to_move() == WHITE ? eval : -eval;
                 v          = base - eval;
 
@@ -167,7 +170,7 @@ std::string trace(Position& pos, const Eval::NNUE::Networks& networks) {
         ss << board[row] << '\n';
     ss << '\n';
 
-    auto t = networks.big.trace_evaluate(pos);
+    auto t = networks.big.trace_evaluate(pos, &caches.big);
 
     ss << " NNUE network contributions "
        << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
diff --git a/src/nnue/nnue_misc.h b/src/nnue/nnue_misc.h
index 5eab02184c6..27a93f88435 100644
--- a/src/nnue/nnue_misc.h
+++ b/src/nnue/nnue_misc.h
@@ -50,12 +50,13 @@ struct NnueEvalTrace {
     std::size_t correctBucket;
 };
 
-
 struct Networks;
+struct AccumulatorCaches;
 
-
-std::string trace(Position& pos, const Networks& networks);
-void        hint_common_parent_position(const Position& pos, const Networks& networks);
+std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
+void        hint_common_parent_position(const Position&    pos,
+                                        const Networks&    networks,
+                                        AccumulatorCaches& caches);
 
 }  // namespace Stockfish::Eval::NNUE
 }  // namespace Stockfish
diff --git a/src/search.cpp b/src/search.cpp
index 6813c1a5f61..7ff94128eb1 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -33,6 +33,8 @@
 #include "misc.h"
 #include "movegen.h"
 #include "movepick.h"
+#include "nnue/network.h"
+#include "nnue/nnue_accumulator.h"
 #include "nnue/nnue_common.h"
 #include "nnue/nnue_misc.h"
 #include "position.h"
@@ -135,6 +137,7 @@ Search::Worker::Worker(SharedState&                    sharedState,
     // Unpack the SharedState struct into member variables
     thread_idx(thread_id),
     manager(std::move(sm)),
+    refreshTable(),
     options(sharedState.options),
     threads(sharedState.threads),
     tt(sharedState.tt),
@@ -143,6 +146,10 @@ Search::Worker::Worker(SharedState&                    sharedState,
 }
 
 void Search::Worker::start_searching() {
+
+    // Initialize accumulator refresh entries
+    refreshTable.clear(networks);
+
     // Non-main threads go directly to iterative_deepening()
     if (!is_mainthread())
     {
@@ -564,7 +571,7 @@ Value Search::Worker::search(
         if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply)
             || ss->ply >= MAX_PLY)
             return (ss->ply >= MAX_PLY && !ss->inCheck)
-                   ? evaluate(networks, pos, thisThread->optimism[us])
+                   ? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
                    : value_draw(thisThread->nodes);
 
         // Step 3. Mate distance pruning. Even if we mate at the next move our score
@@ -698,7 +705,7 @@ Value Search::Worker::search(
     {
         // Providing the hint that this node's accumulator will be used often
         // brings significant Elo gain (~13 Elo).
-        Eval::NNUE::hint_common_parent_position(pos, networks);
+        Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
         unadjustedStaticEval = eval = ss->staticEval;
     }
     else if (ss->ttHit)
@@ -706,9 +713,9 @@ Value Search::Worker::search(
         // Never assume anything about values stored in TT
         unadjustedStaticEval = tte->eval();
         if (unadjustedStaticEval == VALUE_NONE)
-            unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
+            unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
         else if (PvNode)
-            Eval::NNUE::hint_common_parent_position(pos, networks);
+            Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
 
         ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
 
@@ -718,7 +725,7 @@ Value Search::Worker::search(
     }
     else
     {
-        unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
+        unadjustedStaticEval = evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
         ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
 
         // Static evaluation is saved as it was before adjustment by correction history
@@ -875,7 +882,7 @@ Value Search::Worker::search(
                 }
             }
 
-        Eval::NNUE::hint_common_parent_position(pos, networks);
+        Eval::NNUE::hint_common_parent_position(pos, networks, refreshTable);
     }
 
 moves_loop:  // When in check, search starts here
@@ -1413,7 +1420,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
     // Step 2. Check for an immediate draw or maximum ply reached
     if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY)
         return (ss->ply >= MAX_PLY && !ss->inCheck)
-               ? evaluate(networks, pos, thisThread->optimism[us])
+               ? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
                : VALUE_DRAW;
 
     assert(0 <= ss->ply && ss->ply < MAX_PLY);
@@ -1445,7 +1452,8 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
             // Never assume anything about values stored in TT
             unadjustedStaticEval = tte->eval();
             if (unadjustedStaticEval == VALUE_NONE)
-                unadjustedStaticEval = evaluate(networks, pos, thisThread->optimism[us]);
+                unadjustedStaticEval =
+                  evaluate(networks, pos, refreshTable, thisThread->optimism[us]);
             ss->staticEval = bestValue =
               to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
 
@@ -1458,7 +1466,7 @@ Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta,
         {
             // In case of null move search, use previous static eval with a different sign
             unadjustedStaticEval = (ss - 1)->currentMove != Move::null()
-                                   ? evaluate(networks, pos, thisThread->optimism[us])
+                                   ? evaluate(networks, pos, refreshTable, thisThread->optimism[us])
                                    : -(ss - 1)->staticEval;
             ss->staticEval       = bestValue =
               to_corrected_static_eval(unadjustedStaticEval, *thisThread, pos);
diff --git a/src/search.h b/src/search.h
index 3ceaf5ddd0b..0fd778b47e6 100644
--- a/src/search.h
+++ b/src/search.h
@@ -26,9 +26,9 @@
 #include <cstdint>
 #include <functional>
 #include <memory>
+#include <string>
 #include <string_view>
 #include <vector>
-#include <string>
 
 #include "misc.h"
 #include "movepick.h"
@@ -37,6 +37,7 @@
 #include "syzygy/tbprobe.h"
 #include "timeman.h"
 #include "types.h"
+#include "nnue/nnue_accumulator.h"
 
 namespace Stockfish {
 
@@ -301,6 +302,10 @@ class Worker {
 
     Tablebases::Config tbConfig;
 
+    // Used by NNUE
+
+    Eval::NNUE::AccumulatorCaches refreshTable;
+
     const OptionsMap&           options;
     ThreadPool&                 threads;
     TranspositionTable&         tt;