From 7831131591fca89714a376099d6581ec0242244f Mon Sep 17 00:00:00 2001
From: mstembera <m_stembera@yahoo.com>
Date: Thu, 29 Feb 2024 14:27:00 -0800
Subject: [PATCH] Only evaluate the PSQT part of the small net for large evals.

Thanks to Viren6 for suggesting to set complexity to 0.

STC https://tests.stockfishchess.org/tests/view/65d7d6709b2da0226a5a203f
LLR: 2.92 (-2.94,2.94) <0.00,2.00>
Total: 328384 W: 85316 L: 84554 D: 158514
Ptnml(0-2): 1414, 39076, 82486, 39766, 1450

LTC https://tests.stockfishchess.org/tests/view/65dce6d290f639b028a54d2e
LLR: 2.95 (-2.94,2.94) <0.50,2.50>
Total: 165162 W: 41918 L: 41330 D: 81914
Ptnml(0-2): 102, 18332, 45124, 18922, 101

closes https://github.com/official-stockfish/Stockfish/pull/5083

bench: 1504003
---
 src/evaluate.cpp                    |   5 +-
 src/nnue/evaluate_nnue.cpp          |  47 +++---
 src/nnue/evaluate_nnue.h            |   5 +-
 src/nnue/nnue_accumulator.h         |   1 +
 src/nnue/nnue_feature_transformer.h | 252 +++++++++++++++-------------
 src/position.cpp                    |  23 ++-
 6 files changed, 189 insertions(+), 144 deletions(-)
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index f22c0d06cbe..cd026036b4c 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -194,11 +194,12 @@ Value Eval::evaluate(const Position& pos, int optimism) {
 
     int  simpleEval = simple_eval(pos, pos.side_to_move());
     bool smallNet   = std::abs(simpleEval) > 1050;
+    bool psqtOnly   = std::abs(simpleEval) > 2500;
 
     int nnueComplexity;
 
-    Value nnue = smallNet ? NNUE::evaluate<NNUE::Small>(pos, true, &nnueComplexity)
-                          : NNUE::evaluate<NNUE::Big>(pos, true, &nnueComplexity);
+    Value nnue = smallNet ? NNUE::evaluate<NNUE::Small>(pos, true, &nnueComplexity, psqtOnly)
+                          : NNUE::evaluate<NNUE::Big>(pos, true, &nnueComplexity, false);
 
     // Blend optimism and eval with nnue complexity and material imbalance
     optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / 512;
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index 5bd7e83d22f..efcf5b01734 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -179,16 +179,16 @@ write_parameters(std::ostream& stream, NetSize netSize, const std::string& netDe
 
 void hint_common_parent_position(const Position& pos) {
 
-    int simpleEval = simple_eval(pos, pos.side_to_move());
-    if (std::abs(simpleEval) > 1050)
-        featureTransformerSmall->hint_common_access(pos);
+    int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move()));
+    if (simpleEvalAbs > 1050)
+        featureTransformerSmall->hint_common_access(pos, simpleEvalAbs > 2500);
     else
-        featureTransformerBig->hint_common_access(pos);
+        featureTransformerBig->hint_common_access(pos, false);
 }
 
 // Evaluation function. Perform differential calculation.
 template<NetSize Net_Size>
-Value evaluate(const Position& pos, bool adjusted, int* complexity) {
+Value evaluate(const Position& pos, bool adjusted, int* complexity, bool psqtOnly) {
 
     // We manually align the arrays on the stack because with gcc < 9.3
     // overaligning stack variables with alignas() doesn't work correctly.
@@ -213,15 +213,19 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
 
-    const int  bucket     = (pos.count<ALL_PIECES>() - 1) / 4;
-    const auto psqt       = Net_Size == Small
-                            ? featureTransformerSmall->transform(pos, transformedFeatures, bucket)
-                            : featureTransformerBig->transform(pos, transformedFeatures, bucket);
-    const auto positional = Net_Size == Small ? networkSmall[bucket]->propagate(transformedFeatures)
-                                              : networkBig[bucket]->propagate(transformedFeatures);
+    const int  bucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    const auto psqt =
+      Net_Size == Small
+        ? featureTransformerSmall->transform(pos, transformedFeatures, bucket, psqtOnly)
+        : featureTransformerBig->transform(pos, transformedFeatures, bucket, psqtOnly);
+
+    const auto positional =
+      !psqtOnly ? (Net_Size == Small ? networkSmall[bucket]->propagate(transformedFeatures)
+                                     : networkBig[bucket]->propagate(transformedFeatures))
+                : 0;
 
     if (complexity)
-        *complexity = std::abs(psqt - positional) / OutputScale;
+        *complexity = !psqtOnly ? std::abs(psqt - positional) / OutputScale : 0;
 
     // Give more value to positional evaluation when adjusted flag is set
     if (adjusted)
@@ -231,8 +235,8 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
         return static_cast<Value>((psqt + positional) / OutputScale);
 }
 
-template Value evaluate<Big>(const Position& pos, bool adjusted, int* complexity);
-template Value evaluate<Small>(const Position& pos, bool adjusted, int* complexity);
+template Value evaluate<Big>(const Position& pos, bool adjusted, int* complexity, bool psqtOnly);
+template Value evaluate<Small>(const Position& pos, bool adjusted, int* complexity, bool psqtOnly);
 
 struct NnueEvalTrace {
     static_assert(LayerStacks == PSQTBuckets);
@@ -265,8 +269,9 @@ static NnueEvalTrace trace_evaluate(const Position& pos) {
     t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
     for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
     {
-        const auto materialist = featureTransformerBig->transform(pos, transformedFeatures, bucket);
-        const auto positional  = networkBig[bucket]->propagate(transformedFeatures);
+        const auto materialist =
+          featureTransformerBig->transform(pos, transformedFeatures, bucket, false);
+        const auto positional = networkBig[bucket]->propagate(transformedFeatures);
 
         t.psqt[bucket]       = static_cast<Value>(materialist / OutputScale);
         t.positional[bucket] = static_cast<Value>(positional / OutputScale);
@@ -370,16 +375,18 @@ std::string trace(Position& pos) {
                 auto st = pos.state();
 
                 pos.remove_piece(sq);
-                st->accumulatorBig.computed[WHITE] = false;
-                st->accumulatorBig.computed[BLACK] = false;
+                st->accumulatorBig.computed[WHITE]       = st->accumulatorBig.computed[BLACK] =
+                  st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
+                    false;
 
                 Value eval = evaluate<NNUE::Big>(pos);
                 eval       = pos.side_to_move() == WHITE ? eval : -eval;
                 v          = base - eval;
 
                 pos.put_piece(pc, sq);
-                st->accumulatorBig.computed[WHITE] = false;
-                st->accumulatorBig.computed[BLACK] = false;
+                st->accumulatorBig.computed[WHITE]       = st->accumulatorBig.computed[BLACK] =
+                  st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
+                    false;
             }
 
             writeSquare(f, r, pc, v);
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
index ea88f890227..c7b378604c5 100644
--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@@ -76,7 +76,10 @@ using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
 
 std::string trace(Position& pos);
 template<NetSize Net_Size>
-Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
+Value evaluate(const Position& pos,
+               bool            adjusted   = false,
+               int*            complexity = nullptr,
+               bool            psqtOnly   = false);
 void  hint_common_parent_position(const Position& pos);
 
 std::optional<std::string> load_eval(std::istream& stream, NetSize netSize);
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
index 0b05d00da28..c0746b4ee86 100644
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -34,6 +34,7 @@ struct alignas(CacheLineSize) Accumulator {
     std::int16_t accumulation[2][Size];
     std::int32_t psqtAccumulation[2][PSQTBuckets];
     bool         computed[2];
+    bool         computedPSQT[2];
 };
 
 }  // namespace Stockfish::Eval::NNUE
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
index 3399b82df6a..b42f160475f 100644
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -250,18 +250,21 @@ class FeatureTransformer {
     }
 
     // Convert input features
-    std::int32_t transform(const Position& pos, OutputType* output, int bucket) const {
-        update_accumulator<WHITE>(pos);
-        update_accumulator<BLACK>(pos);
+    std::int32_t
+    transform(const Position& pos, OutputType* output, int bucket, bool psqtOnly) const {
+        update_accumulator<WHITE>(pos, psqtOnly);
+        update_accumulator<BLACK>(pos, psqtOnly);
 
         const Color perspectives[2]  = {pos.side_to_move(), ~pos.side_to_move()};
-        const auto& accumulation     = (pos.state()->*accPtr).accumulation;
         const auto& psqtAccumulation = (pos.state()->*accPtr).psqtAccumulation;
-
-        const auto psqt =
+        const auto  psqt =
           (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
           / 2;
 
+        if (psqtOnly)
+            return psqt;
+
+        const auto& accumulation = (pos.state()->*accPtr).accumulation;
 
         for (IndexType p = 0; p < 2; ++p)
         {
@@ -312,20 +315,22 @@ class FeatureTransformer {
         return psqt;
     }  // end of function transform()
 
-    void hint_common_access(const Position& pos) const {
-        hint_common_access_for_perspective<WHITE>(pos);
-        hint_common_access_for_perspective<BLACK>(pos);
+    void hint_common_access(const Position& pos, bool psqtOnly) const {
+        hint_common_access_for_perspective<WHITE>(pos, psqtOnly);
+        hint_common_access_for_perspective<BLACK>(pos, psqtOnly);
     }
 
    private:
     template<Color Perspective>
     [[nodiscard]] std::pair<StateInfo*, StateInfo*>
-    try_find_computed_accumulator(const Position& pos) const {
+    try_find_computed_accumulator(const Position& pos, bool psqtOnly) const {
         // Look for a usable accumulator of an earlier position. We keep track
         // of the estimated gain in terms of features to be added/subtracted.
         StateInfo *st = pos.state(), *next = nullptr;
         int        gain = FeatureSet::refresh_cost(pos);
-        while (st->previous && !(st->*accPtr).computed[Perspective])
+        while (st->previous
+               && (!(st->*accPtr).computedPSQT[Perspective]
+                   || (!psqtOnly && !(st->*accPtr).computed[Perspective])))
         {
             // This governs when a full feature refresh is needed and how many
             // updates are better than just one full refresh.
@@ -347,7 +352,8 @@ class FeatureTransformer {
     template<Color Perspective, size_t N>
     void update_accumulator_incremental(const Position& pos,
                                         StateInfo*      computed_st,
-                                        StateInfo*      states_to_update[N]) const {
+                                        StateInfo*      states_to_update[N],
+                                        bool            psqtOnly) const {
         static_assert(N > 0);
         assert(states_to_update[N - 1] == nullptr);
 
@@ -383,7 +389,8 @@ class FeatureTransformer {
 
             for (; i >= 0; --i)
             {
-                (states_to_update[i]->*accPtr).computed[Perspective] = true;
+                (states_to_update[i]->*accPtr).computed[Perspective]     = !psqtOnly;
+                (states_to_update[i]->*accPtr).computedPSQT[Perspective] = true;
 
                 const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];
 
@@ -403,31 +410,34 @@ class FeatureTransformer {
         {
             assert(states_to_update[0]);
 
-            auto accIn =
-              reinterpret_cast<const vec_t*>(&(st->*accPtr).accumulation[Perspective][0]);
-            auto accOut = reinterpret_cast<vec_t*>(
-              &(states_to_update[0]->*accPtr).accumulation[Perspective][0]);
+            if (!psqtOnly)
+            {
+                auto accIn =
+                  reinterpret_cast<const vec_t*>(&(st->*accPtr).accumulation[Perspective][0]);
+                auto accOut = reinterpret_cast<vec_t*>(
+                  &(states_to_update[0]->*accPtr).accumulation[Perspective][0]);
 
-            const IndexType offsetR0 = HalfDimensions * removed[0][0];
-            auto            columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
-            const IndexType offsetA  = HalfDimensions * added[0][0];
-            auto            columnA  = reinterpret_cast<const vec_t*>(&weights[offsetA]);
+                const IndexType offsetR0 = HalfDimensions * removed[0][0];
+                auto            columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
+                const IndexType offsetA  = HalfDimensions * added[0][0];
+                auto            columnA  = reinterpret_cast<const vec_t*>(&weights[offsetA]);
 
-            if (removed[0].size() == 1)
-            {
-                for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
-                     ++k)
-                    accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
-            }
-            else
-            {
-                const IndexType offsetR1 = HalfDimensions * removed[0][1];
-                auto            columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
+                if (removed[0].size() == 1)
+                {
+                    for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
+                         ++k)
+                        accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]);
+                }
+                else
+                {
+                    const IndexType offsetR1 = HalfDimensions * removed[0][1];
+                    auto            columnR1 = reinterpret_cast<const vec_t*>(&weights[offsetR1]);
 
-                for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
-                     ++k)
-                    accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]),
-                                           vec_add_16(columnR0[k], columnR1[k]));
+                    for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t);
+                         ++k)
+                        accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]),
+                                               vec_add_16(columnR0[k], columnR1[k]));
+                }
             }
 
             auto accPsqtIn =
@@ -461,41 +471,43 @@ class FeatureTransformer {
         }
         else
         {
-            for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
-            {
-                // Load accumulator
-                auto accTileIn = reinterpret_cast<const vec_t*>(
-                  &(st->*accPtr).accumulation[Perspective][j * TileHeight]);
-                for (IndexType k = 0; k < NumRegs; ++k)
-                    acc[k] = vec_load(&accTileIn[k]);
-
-                for (IndexType i = 0; states_to_update[i]; ++i)
+            if (!psqtOnly)
+                for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
                 {
-                    // Difference calculation for the deactivated features
-                    for (const auto index : removed[i])
-                    {
-                        const IndexType offset = HalfDimensions * index + j * TileHeight;
-                        auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
-                        for (IndexType k = 0; k < NumRegs; ++k)
-                            acc[k] = vec_sub_16(acc[k], column[k]);
-                    }
+                    // Load accumulator
+                    auto accTileIn = reinterpret_cast<const vec_t*>(
+                      &(st->*accPtr).accumulation[Perspective][j * TileHeight]);
+                    for (IndexType k = 0; k < NumRegs; ++k)
+                        acc[k] = vec_load(&accTileIn[k]);
 
-                    // Difference calculation for the activated features
-                    for (const auto index : added[i])
+                    for (IndexType i = 0; states_to_update[i]; ++i)
                     {
-                        const IndexType offset = HalfDimensions * index + j * TileHeight;
-                        auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
+                        // Difference calculation for the deactivated features
+                        for (const auto index : removed[i])
+                        {
+                            const IndexType offset = HalfDimensions * index + j * TileHeight;
+                            auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+                            for (IndexType k = 0; k < NumRegs; ++k)
+                                acc[k] = vec_sub_16(acc[k], column[k]);
+                        }
+
+                        // Difference calculation for the activated features
+                        for (const auto index : added[i])
+                        {
+                            const IndexType offset = HalfDimensions * index + j * TileHeight;
+                            auto column = reinterpret_cast<const vec_t*>(&weights[offset]);
+                            for (IndexType k = 0; k < NumRegs; ++k)
+                                acc[k] = vec_add_16(acc[k], column[k]);
+                        }
+
+                        // Store accumulator
+                        auto accTileOut =
+                          reinterpret_cast<vec_t*>(&(states_to_update[i]->*accPtr)
+                                                      .accumulation[Perspective][j * TileHeight]);
                         for (IndexType k = 0; k < NumRegs; ++k)
-                            acc[k] = vec_add_16(acc[k], column[k]);
+                            vec_store(&accTileOut[k], acc[k]);
                     }
-
-                    // Store accumulator
-                    auto accTileOut = reinterpret_cast<vec_t*>(
-                      &(states_to_update[i]->*accPtr).accumulation[Perspective][j * TileHeight]);
-                    for (IndexType k = 0; k < NumRegs; ++k)
-                        vec_store(&accTileOut[k], acc[k]);
                 }
-            }
 
             for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
             {
@@ -537,8 +549,10 @@ class FeatureTransformer {
 #else
         for (IndexType i = 0; states_to_update[i]; ++i)
         {
-            std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective],
-                        (st->*accPtr).accumulation[Perspective], HalfDimensions * sizeof(BiasType));
+            if (!psqtOnly)
+                std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective],
+                            (st->*accPtr).accumulation[Perspective],
+                            HalfDimensions * sizeof(BiasType));
 
             for (std::size_t k = 0; k < PSQTBuckets; ++k)
                 (states_to_update[i]->*accPtr).psqtAccumulation[Perspective][k] =
@@ -549,10 +563,12 @@ class FeatureTransformer {
             // Difference calculation for the deactivated features
             for (const auto index : removed[i])
             {
-                const IndexType offset = HalfDimensions * index;
-
-                for (IndexType j = 0; j < HalfDimensions; ++j)
-                    (st->*accPtr).accumulation[Perspective][j] -= weights[offset + j];
+                if (!psqtOnly)
+                {
+                    const IndexType offset = HalfDimensions * index;
+                    for (IndexType j = 0; j < HalfDimensions; ++j)
+                        (st->*accPtr).accumulation[Perspective][j] -= weights[offset + j];
+                }
 
                 for (std::size_t k = 0; k < PSQTBuckets; ++k)
                     (st->*accPtr).psqtAccumulation[Perspective][k] -=
@@ -562,10 +578,12 @@ class FeatureTransformer {
             // Difference calculation for the activated features
             for (const auto index : added[i])
             {
-                const IndexType offset = HalfDimensions * index;
-
-                for (IndexType j = 0; j < HalfDimensions; ++j)
-                    (st->*accPtr).accumulation[Perspective][j] += weights[offset + j];
+                if (!psqtOnly)
+                {
+                    const IndexType offset = HalfDimensions * index;
+                    for (IndexType j = 0; j < HalfDimensions; ++j)
+                        (st->*accPtr).accumulation[Perspective][j] += weights[offset + j];
+                }
 
                 for (std::size_t k = 0; k < PSQTBuckets; ++k)
                     (st->*accPtr).psqtAccumulation[Perspective][k] +=
@@ -576,7 +594,7 @@ class FeatureTransformer {
     }
 
     template<Color Perspective>
-    void update_accumulator_refresh(const Position& pos) const {
+    void update_accumulator_refresh(const Position& pos, bool psqtOnly) const {
 #ifdef VECTOR
         // Gcc-10.2 unnecessarily spills AVX2 registers if this array
         // is defined in the VECTOR code below, once in each branch
@@ -587,32 +605,34 @@ class FeatureTransformer {
         // Refresh the accumulator
         // Could be extracted to a separate function because it's done in 2 places,
         // but it's unclear if compilers would correctly handle register allocation.
-        auto& accumulator                 = pos.state()->*accPtr;
-        accumulator.computed[Perspective] = true;
+        auto& accumulator                     = pos.state()->*accPtr;
+        accumulator.computed[Perspective]     = !psqtOnly;
+        accumulator.computedPSQT[Perspective] = true;
         FeatureSet::IndexList active;
         FeatureSet::append_active_indices<Perspective>(pos, active);
 
 #ifdef VECTOR
-        for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
-        {
-            auto biasesTile = reinterpret_cast<const vec_t*>(&biases[j * TileHeight]);
-            for (IndexType k = 0; k < NumRegs; ++k)
-                acc[k] = biasesTile[k];
-
-            for (const auto index : active)
+        if (!psqtOnly)
+            for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
             {
-                const IndexType offset = HalfDimensions * index + j * TileHeight;
-                auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
+                auto biasesTile = reinterpret_cast<const vec_t*>(&biases[j * TileHeight]);
+                for (IndexType k = 0; k < NumRegs; ++k)
+                    acc[k] = biasesTile[k];
 
-                for (unsigned k = 0; k < NumRegs; ++k)
-                    acc[k] = vec_add_16(acc[k], column[k]);
-            }
+                for (const auto index : active)
+                {
+                    const IndexType offset = HalfDimensions * index + j * TileHeight;
+                    auto            column = reinterpret_cast<const vec_t*>(&weights[offset]);
 
-            auto accTile =
-              reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
-            for (unsigned k = 0; k < NumRegs; k++)
-                vec_store(&accTile[k], acc[k]);
-        }
+                    for (unsigned k = 0; k < NumRegs; ++k)
+                        acc[k] = vec_add_16(acc[k], column[k]);
+                }
+
+                auto accTile =
+                  reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
+                for (unsigned k = 0; k < NumRegs; k++)
+                    vec_store(&accTile[k], acc[k]);
+            }
 
         for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
         {
@@ -635,18 +655,21 @@ class FeatureTransformer {
         }
 
 #else
-        std::memcpy(accumulator.accumulation[Perspective], biases,
-                    HalfDimensions * sizeof(BiasType));
+        if (!psqtOnly)
+            std::memcpy(accumulator.accumulation[Perspective], biases,
+                        HalfDimensions * sizeof(BiasType));
 
         for (std::size_t k = 0; k < PSQTBuckets; ++k)
             accumulator.psqtAccumulation[Perspective][k] = 0;
 
         for (const auto index : active)
         {
-            const IndexType offset = HalfDimensions * index;
-
-            for (IndexType j = 0; j < HalfDimensions; ++j)
-                accumulator.accumulation[Perspective][j] += weights[offset + j];
+            if (!psqtOnly)
+            {
+                const IndexType offset = HalfDimensions * index;
+                for (IndexType j = 0; j < HalfDimensions; ++j)
+                    accumulator.accumulation[Perspective][j] += weights[offset + j];
+            }
 
             for (std::size_t k = 0; k < PSQTBuckets; ++k)
                 accumulator.psqtAccumulation[Perspective][k] +=
@@ -656,7 +679,7 @@ class FeatureTransformer {
     }
 
     template<Color Perspective>
-    void hint_common_access_for_perspective(const Position& pos) const {
+    void hint_common_access_for_perspective(const Position& pos, bool psqtOnly) const {
 
         // Works like update_accumulator, but performs less work.
         // Updates ONLY the accumulator for pos.
@@ -664,27 +687,31 @@ class FeatureTransformer {
         // Look for a usable accumulator of an earlier position. We keep track
         // of the estimated gain in terms of features to be added/subtracted.
         // Fast early exit.
-        if ((pos.state()->*accPtr).computed[Perspective])
+        if ((pos.state()->*accPtr).computed[Perspective]
+            || (psqtOnly && (pos.state()->*accPtr).computedPSQT[Perspective]))
             return;
 
-        auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos);
+        auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
 
-        if ((oldest_st->*accPtr).computed[Perspective])
+        if ((oldest_st->*accPtr).computed[Perspective]
+            || (psqtOnly && (oldest_st->*accPtr).computedPSQT[Perspective]))
         {
             // Only update current position accumulator to minimize work.
             StateInfo* states_to_update[2] = {pos.state(), nullptr};
-            update_accumulator_incremental<Perspective, 2>(pos, oldest_st, states_to_update);
+            update_accumulator_incremental<Perspective, 2>(pos, oldest_st, states_to_update,
+                                                           psqtOnly);
         }
         else
-            update_accumulator_refresh<Perspective>(pos);
+            update_accumulator_refresh<Perspective>(pos, psqtOnly);
     }
 
     template<Color Perspective>
-    void update_accumulator(const Position& pos) const {
+    void update_accumulator(const Position& pos, bool psqtOnly) const {
 
-        auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos);
+        auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos, psqtOnly);
 
-        if ((oldest_st->*accPtr).computed[Perspective])
+        if ((oldest_st->*accPtr).computed[Perspective]
+            || (psqtOnly && (oldest_st->*accPtr).computedPSQT[Perspective]))
         {
             if (next == nullptr)
                 return;
@@ -697,12 +724,11 @@ class FeatureTransformer {
             StateInfo* states_to_update[3] = {next, next == pos.state() ? nullptr : pos.state(),
                                               nullptr};
 
-            update_accumulator_incremental<Perspective, 3>(pos, oldest_st, states_to_update);
+            update_accumulator_incremental<Perspective, 3>(pos, oldest_st, states_to_update,
+                                                           psqtOnly);
         }
         else
-        {
-            update_accumulator_refresh<Perspective>(pos);
-        }
+            update_accumulator_refresh<Perspective>(pos, psqtOnly);
     }
 
     alignas(CacheLineSize) BiasType biases[HalfDimensions];
diff --git a/src/position.cpp b/src/position.cpp
index c89b1eb0889..2263afe7669 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -680,10 +680,14 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
     ++st->pliesFromNull;
 
     // Used by NNUE
-    st->accumulatorBig.computed[WHITE]     = st->accumulatorBig.computed[BLACK] =
-      st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
-    auto& dp                                                                      = st->dirtyPiece;
-    dp.dirty_num                                                                  = 1;
+    st->accumulatorBig.computed[WHITE]             = st->accumulatorBig.computed[BLACK] =
+      st->accumulatorBig.computedPSQT[WHITE]       = st->accumulatorBig.computedPSQT[BLACK] =
+        st->accumulatorSmall.computed[WHITE]       = st->accumulatorSmall.computed[BLACK] =
+          st->accumulatorSmall.computedPSQT[WHITE] = st->accumulatorSmall.computedPSQT[BLACK] =
+            false;
+
+    auto& dp     = st->dirtyPiece;
+    dp.dirty_num = 1;
 
     Color  us       = sideToMove;
     Color  them     = ~us;
@@ -965,10 +969,13 @@ void Position::do_null_move(StateInfo& newSt, TranspositionTable& tt) {
     newSt.previous = st;
     st             = &newSt;
 
-    st->dirtyPiece.dirty_num               = 0;
-    st->dirtyPiece.piece[0]                = NO_PIECE;  // Avoid checks in UpdateAccumulator()
-    st->accumulatorBig.computed[WHITE]     = st->accumulatorBig.computed[BLACK] =
-      st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] = false;
+    st->dirtyPiece.dirty_num                 = 0;
+    st->dirtyPiece.piece[0]                  = NO_PIECE;  // Avoid checks in UpdateAccumulator()
+    st->accumulatorBig.computed[WHITE]       = st->accumulatorBig.computed[BLACK] =
+      st->accumulatorBig.computedPSQT[WHITE] = st->accumulatorBig.computedPSQT[BLACK] =
+        st->accumulatorSmall.computed[WHITE] = st->accumulatorSmall.computed[BLACK] =
+          st->accumulatorSmall.computedPSQT[WHITE] = st->accumulatorSmall.computedPSQT[BLACK] =
+            false;
 
     if (st->epSquare != SQ_NONE)
     {