Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement accumulator refresh table #5183

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions src/evaluate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@
#include <iomanip>
#include <iostream>
#include <sstream>
#include <memory>

#include "nnue/network.h"
#include "nnue/nnue_misc.h"
#include "position.h"
#include "types.h"
#include "uci.h"
#include "nnue/nnue_accumulator.h"

namespace Stockfish {

Expand All @@ -45,7 +47,10 @@ int Eval::simple_eval(const Position& pos, Color c) {

// Evaluate is the evaluator for the outer world. It returns a static evaluation
// of the position from the point of view of the side to move.
Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
const Position& pos,
Eval::NNUE::AccumulatorCaches& caches,
int optimism) {

assert(!pos.checkers());

Expand All @@ -55,8 +60,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
int nnueComplexity;
int v;

Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
: networks.big.evaluate(pos, true, &nnueComplexity, false);
Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);

const auto adjustEval = [&](int optDiv, int nnueDiv, int pawnCountConstant, int pawnCountMul,
int npmConstant, int evalDiv, int shufflingConstant,
Expand Down Expand Up @@ -94,20 +99,22 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
// Trace scores are from white's point of view
std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {

auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>();

if (pos.checkers())
return "Final evaluation: none (in check)";

std::stringstream ss;
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
ss << '\n' << NNUE::trace(pos, networks) << '\n';
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';

ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);

Value v = networks.big.evaluate(pos, false);
Value v = networks.big.evaluate(pos, &caches->big, false);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";

v = evaluate(networks, pos, VALUE_ZERO);
v = evaluate(networks, pos, *caches, VALUE_ZERO);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
ss << " [with scaled NNUE, ...]";
Expand Down
8 changes: 5 additions & 3 deletions src/evaluate.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,16 @@ constexpr inline int SmallNetThreshold = 1274, PsqtOnlyThreshold = 2389;

namespace NNUE {
struct Networks;
struct AccumulatorCaches;
}

std::string trace(Position& pos, const Eval::NNUE::Networks& networks);

int simple_eval(const Position& pos, Color c);
Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);


Value evaluate(const NNUE::Networks& networks,
const Position& pos,
Eval::NNUE::AccumulatorCaches& caches,
int optimism);
} // namespace Eval

} // namespace Stockfish
Expand Down
4 changes: 3 additions & 1 deletion src/nnue/features/half_ka_v2_hm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "../../bitboard.h"
#include "../../position.h"
#include "../../types.h"
#include "../nnue_common.h"
#include "../nnue_accumulator.h"

namespace Stockfish::Eval::NNUE::Features {

Expand All @@ -49,6 +49,8 @@ void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active)
// Explicit template instantiations
template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);

// Get a list of indices for recently changed features
template<Color Perspective>
Expand Down
8 changes: 4 additions & 4 deletions src/nnue/features/half_ka_v2_hm.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ class HalfKAv2_hm {
{PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};

// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
static IndexType make_index(Square s, Piece pc, Square ksq);

public:
// Feature name
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
Expand Down Expand Up @@ -126,6 +122,10 @@ class HalfKAv2_hm {
static constexpr IndexType MaxActiveDimensions = 32;
using IndexList = ValueList<IndexType, MaxActiveDimensions>;

// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
static IndexType make_index(Square s, Piece pc, Square ksq);

// Get a list of indices for active features
template<Color Perspective>
static void append_active_indices(const Position& pos, IndexList& active);
Expand Down
45 changes: 25 additions & 20 deletions src/nnue/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,31 +186,33 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename


template<typename Arch, typename Transformer>
Value Network<Arch, Transformer>::evaluate(const Position& pos,
bool adjusted,
int* complexity,
bool psqtOnly) const {
Value Network<Arch, Transformer>::evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache,
bool adjusted,
int* complexity,
bool psqtOnly) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.

constexpr uint64_t alignment = CacheLineSize;
constexpr int delta = 24;

#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformedFeaturesUnaligned
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
+ alignment / sizeof(TransformedFeatureType)];
TransformedFeatureType
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+ alignment / sizeof(TransformedFeatureType)];

auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
#else
alignas(alignment) TransformedFeatureType transformedFeatures
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
alignas(alignment) TransformedFeatureType
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
#endif

ASSERT_ALIGNED(transformedFeatures, alignment);

const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket, psqtOnly);
const auto psqt =
featureTransformer->transform(pos, cache, transformedFeatures, bucket, psqtOnly);
const auto positional = !psqtOnly ? (network[bucket]->propagate(transformedFeatures)) : 0;

if (complexity)
Expand Down Expand Up @@ -255,26 +257,29 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {


template<typename Arch, typename Transformer>
void Network<Arch, Transformer>::hint_common_access(const Position& pos, bool psqtOnl) const {
featureTransformer->hint_common_access(pos, psqtOnl);
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache,
bool psqtOnl) const {
featureTransformer->hint_common_access(pos, cache, psqtOnl);
}


template<typename Arch, typename Transformer>
NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) const {
NnueEvalTrace
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache) const {
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = CacheLineSize;

#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformedFeaturesUnaligned
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
+ alignment / sizeof(TransformedFeatureType)];
TransformedFeatureType
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+ alignment / sizeof(TransformedFeatureType)];

auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
#else
alignas(alignment) TransformedFeatureType transformedFeatures
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
alignas(alignment) TransformedFeatureType
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
#endif

ASSERT_ALIGNED(transformedFeatures, alignment);
Expand All @@ -284,7 +289,7 @@ NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) co
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
{
const auto materialist =
featureTransformer->transform(pos, transformedFeatures, bucket, false);
featureTransformer->transform(pos, cache, transformedFeatures, bucket, false);
const auto positional = network[bucket]->propagate(transformedFeatures);

t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
Expand Down
24 changes: 16 additions & 8 deletions src/nnue/network.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
#include "nnue_architecture.h"
#include "nnue_feature_transformer.h"
#include "nnue_misc.h"
#include "nnue_accumulator.h"

namespace Stockfish::Eval::NNUE {


enum class EmbeddedNNUEType {
BIG,
SMALL,
Expand All @@ -43,6 +43,8 @@ enum class EmbeddedNNUEType {

template<typename Arch, typename Transformer>
class Network {
static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;

public:
Network(EvalFile file, EmbeddedNNUEType type) :
evalFile(file),
Expand All @@ -51,17 +53,20 @@ class Network {
void load(const std::string& rootDirectory, std::string evalfilePath);
bool save(const std::optional<std::string>& filename) const;

Value evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache,
bool adjusted = false,
int* complexity = nullptr,
bool psqtOnly = false) const;

Value evaluate(const Position& pos,
bool adjusted = false,
int* complexity = nullptr,
bool psqtOnly = false) const;


void hint_common_access(const Position& pos, bool psqtOnl) const;
void hint_common_access(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache,
bool psqtOnl) const;

void verify(std::string evalfilePath) const;
NnueEvalTrace trace_evaluate(const Position& pos) const;
NnueEvalTrace trace_evaluate(const Position& pos,
AccumulatorCaches::Cache<FTDimensions>* cache) const;

private:
void load_user_net(const std::string&, const std::string&);
Expand Down Expand Up @@ -89,6 +94,9 @@ class Network {

// Hash value of evaluation function structure
static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();

template<IndexType Size>
friend struct AccumulatorCaches::Cache;
};

// Definitions of the network types
Expand Down
70 changes: 66 additions & 4 deletions src/nnue/nnue_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,75 @@

namespace Stockfish::Eval::NNUE {

using BiasType = std::int16_t;
using PSQTWeightType = std::int32_t;
using IndexType = std::uint32_t;

// Class that holds the result of affine transformation of input features
template<IndexType Size>
struct alignas(CacheLineSize) Accumulator {
std::int16_t accumulation[2][Size];
std::int32_t psqtAccumulation[2][PSQTBuckets];
bool computed[2];
bool computedPSQT[2];
std::int16_t accumulation[COLOR_NB][Size];
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
bool computed[COLOR_NB];
bool computedPSQT[COLOR_NB];
};


// AccumulatorCaches struct provides per-thread accumulator caches, where each
// cache contains multiple entries for each of the possible king squares.
// When the accumulator needs to be refreshed, the cached entry is used to more
// efficiently update the accumulator, instead of rebuilding it from scratch.
// This idea, was first described by Luecx (author of Koivisto) and
// is commonly referred to as "Finny Tables".
struct AccumulatorCaches {

template<IndexType Size>
struct alignas(CacheLineSize) Cache {

struct alignas(CacheLineSize) Entry {
BiasType accumulation[COLOR_NB][Size];
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
Bitboard byColorBB[COLOR_NB][COLOR_NB];
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];

// To initialize a refresh entry, we set all its bitboards empty,
// so we put the biases in the accumulation, without any weights on top
void clear(const BiasType* biases) {

std::memset(byColorBB, 0, sizeof(byColorBB));
std::memset(byTypeBB, 0, sizeof(byTypeBB));

std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));

std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
}
};

template<typename Network>
void clear(const Network& network) {
for (auto& entry : entries)
entry.clear(network.featureTransformer->biases);
}

void clear(const BiasType* biases) {
for (auto& entry : entries)
entry.clear(biases);
}

Entry& operator[](Square sq) { return entries[sq]; }

std::array<Entry, SQUARE_NB> entries;
};

template<typename Networks>
void clear(const Networks& networks) {
big.clear(networks.big);
}

// When adding a new cache for a network, i.e. the smallnet
// the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
Cache<TransformedFeatureDimensionsBig> big;
};

} // namespace Stockfish::Eval::NNUE
Expand Down
Loading