Skip to content

Commit

Permalink
lz4+zstd compression (near#6365)
Browse files Browse the repository at this point in the history
* Replace default Snappy compression in RocksDB with lz4 on all levels
except of bottommost which is zstd.

We aren't going to do the recompression the nodes explicitly. We
expect RPC nodes to be almost fully updated in 5 epochs. For archival
nodes we don't have estimation on the part of the data to be
recompressed over time. Later on if it becomes critical we'll probably
make it possible in the CLI to recompress the whole DB in place.

Experimentation was done in the following manner: We read all
key/value pair from the store and wrote them in a new store for both
the current compression algo (snappy) and lz4+zstd.

All sizes in GB.

                                 ______RPC node from scratch______  ______Archival  node from backup_____
#  column                        snappy  lz4+zstd  Saved            snappy   lz4+zstd  Saved
0  ColDbVersion                    0.00    0.00     0.00    -7.12%     0.00     0.00    0.00     -5.16%
1  ColBlockMisc                    0.00    0.00     0.00    64.66%     0.00     0.00    0.00     83.78%
2  ColBlock                        0.83    0.76     0.07     8.71%   129.74   116.06   13.68     11.79%
3  ColBlockHeader                108.79  108.90    -0.10    -0.09%   106.62   106.71   -0.10     -0.09%
4  ColBlockHeight                  1.62    1.48     0.14     9.44%     1.57     1.43    0.14      9.49%
5  ColState                       27.62   22.53     5.09    22.58%   348.80   295.61   53.20     18.00%
6  ColChunkExtra                   0.05    0.09    -0.04   -40.59%     6.47     5.79    0.68     11.83%
7  ColTransactionResult            1.89    1.59     0.30    18.83%    18.13    14.91    3.22     21.59%
8  ColOutgoingReceipts             1.02    0.89     0.12    13.97%    79.57    67.93   11.64     17.13%
9  ColIncomingReceipts             0.94    0.93     0.01     1.60%    83.07    70.38   12.68     18.02%
10 ColPeers                        0.00    0.00     0.00   -33.65%     0.00     0.00    0.00     -8.83%
11 ColEpochInfo                    0.01    0.02    -0.01   -67.64%     0.00     0.00    0.00      2.26%
12 ColBlockInfo                    8.34    6.95     1.39    19.92%     8.23     6.91    1.32     19.07%
13 ColChunks                       2.53    2.08     0.45    21.37%   199.69   165.51   34.18     20.65%
14 ColPartialChunks               21.14   18.02     3.13    17.35%   634.03  1138.52  495.51     43.52%
15 ColBlocksToCatchup              0.00    0.00     0.00     0.00%     0.00     0.00    0.00     10.50%
16 ColStateDlInfos                 0.00    0.00     0.00     0.00%     0.00     0.00    0.00  75561.59%
17 ColChallengedBlocks             0.00    0.00     0.00     0.00%     0.00     0.00    0.00      0.00%
18 ColStateHeaders                 0.00    0.00     0.00   -21.17%     0.00     0.00    0.00      8.61%
19 ColInvalidChunks                0.00    0.00     0.00     0.00%     0.00     0.00    0.00      0.00%
20 ColBlockExtra                   0.01    0.01     0.00   -31.27%     1.44     1.36    0.07      5.39%
21 ColBlockPerHeight               0.01    0.02    -0.01   -61.46%     2.94     2.11    0.84     39.65%
22 ColStateParts                   2.09    4.49    -2.41   -53.56%    67.27    39.38   27.89     70.83%
23 ColEpochStart                   0.00    0.00     0.00   -20.03%     0.00     0.00    0.00   2086.55%
24 ColAccountAnnouncements         0.00    0.00     0.00   -27.72%     0.00     0.00    0.00     28.02%
25 ColNextBlockHashes              2.96    2.77     0.19     6.74%     2.91     2.71    0.20      7.43%
26 ColEpochLightClientBlocks       0.00    0.00     0.00    -5.69%     0.01     0.00    0.00     38.29%
27 ColReceiptIdToShardId           0.11    0.15    -0.04   -26.61%     1.86     1.70    0.16      9.12%
28 _ColNextBlockWithNewChunk       0.00    0.00     0.00     0.00%     0.00     0.00    0.00      0.00%
29 _ColLastBlockWithNewChunk       0.00    0.00     0.00     0.00%     0.00     0.00    0.00      0.00%
30 ColPeerComponent                0.00    0.00     0.00    59.15%     0.00     0.00    0.00    950.57%
31 ColComponentEdges               0.06    0.03     0.03    98.83%     0.01     0.00    0.01   4094.76%
32 ColLastComponentNonce           0.00    0.00     0.00    89.72%     0.00     0.00    0.00     87.31%
33 ColTransactions                 1.45    1.22     0.23    18.71%   103.24    82.58   20.66     25.02%
34 ColChunkPerHeightShard          0.03    0.06    -0.02   -40.11%     3.17     2.91    0.26      9.03%
35 ColStateChanges                 1.55    1.37     0.18    13.51%    58.67    34.82   23.85     68.51%
36 ColBlockRefCount                0.01    0.01     0.00   -30.37%     1.48     1.36    0.12      8.62%
37 ColTrieChanges                 29.74   16.93    12.82    75.72%   466.22   339.68  126.54     37.25%
38 ColBlockMerkleTree             17.02   12.72     4.30    33.77%    16.97    12.42    4.55     36.66%
39 ColChunkHashesByHeight          0.02    0.02     0.00    -2.57%     2.97     2.73    0.23      8.57%
40 ColBlockOrdinal                 1.64    1.47     0.18    11.95%     1.58     1.42    0.16     11.24%
41 ColGCCount                      0.00    0.00     0.00    27.82%     0.00     0.00    0.00      0.00%
42 ColOutcomeIds                   0.17    0.17     0.00    -1.40%     3.95     3.73    0.22      5.85%
43 _ColTransactionRefCount         0.00    0.00     0.00     0.00%     0.00     0.00    0.00      0.00%
44 ColProcessedBlockHeights        0.00    0.00     0.00   -87.51%     0.17     0.13    0.04     28.28%
45 ColReceipts                     1.04    0.87     0.17    19.63%    79.79    67.56   12.23     18.11%
46 ColCachedContractCode          15.57    8.28     7.29    88.10%    32.84    16.57   16.27     98.18%
47 ColEpochValidatorInfo           0.00    0.01     0.00   -52.36%     0.00     0.00    0.00     -1.57%
48 ColHeaderHashesByHeight         1.66    1.85    -0.19   -10.51%     1.47     1.31    0.16     12.08%
49 ColStateChangesForSplitStates   0         0      0.00     0.00%     0.00     0.00    0.00      0.00%
_  sum                           249.93  216.69    33.24    15.34%  3464.90  2604.28  860.62     33.05%
Perf report:

export CARGO_PROFILE_RELEASE_LTO=fat
export CARGO_PROFILE_RELEASE_CODEGEN_UNITS=1
cargo build --release -p runtime-params-estimator

8 runs in 4 parallel threads:
target/release/runtime-params-estimator --metric time --costs RocksDbReadValueByte,RocksDbInsertValueByte\
    --home ~/.near:

RocksDbInsertValueByte 4_015_512 gas [ 4ns ] (computed in 22.384337452s)
RocksDbInsertValueByte 5_102_943 gas [ 5ns ] (computed in 23.08667252s)
RocksDbInsertValueByte 6_050_856 gas [ 6ns ] (computed in 23.277226336s)
RocksDbInsertValueByte 6_965_742 gas [ 6ns ] (computed in 23.63915479s)
RocksDbInsertValueByte 7_975_674 gas [ 7ns ] (computed in 21.727054944s)
RocksDbInsertValueByte 8_592_293 gas [ 8ns ] (computed in 23.341369568s)
RocksDbInsertValueByte 9_545_668 gas [ 9ns ] (computed in 25.831370918s)
RocksDbInsertValueByte 9_863_761 gas [ 9ns ] (computed in 24.980539953s)

RocksDbReadValueByte   6_637_802 gas [ 6ns ] (computed in 24.112692062s)
RocksDbReadValueByte   6_836_660 gas [ 6ns ] (computed in 20.590620688s)
RocksDbReadValueByte   7_030_828 gas [ 7ns ] (computed in 23.016204945s)
RocksDbReadValueByte   8_138_892 gas [ 8ns ] (computed in 22.800273225s)
RocksDbReadValueByte   8_629_815 gas [ 8ns ] (computed in 21.172093785s)
RocksDbReadValueByte   9_282_138 gas [ 9ns ] (computed in 22.364099438s)
RocksDbReadValueByte   9_309_050 gas [ 9ns ] (computed in 21.910096242s)

export CARGO_PROFILE_RELEASE_LTO=fat
export CARGO_PROFILE_RELEASE_CODEGEN_UNITS=1
cargo build --release -p runtime-params-estimator --features required

8 runs in 4 parallel threads:
target/release/runtime-params-estimator --metric time --costs RocksDbReadValueByte,RocksDbInsertValueByte\
    --home ~/.near:

RocksDbInsertValueByte 4_704_241 gas [ 4ns ] (computed in 24.334580544s)
RocksDbInsertValueByte 5_025_573 gas [ 5ns ] (computed in 24.561436598s)
RocksDbInsertValueByte 6_562_665 gas [ 6ns ] (computed in 24.517880931s)
RocksDbInsertValueByte 7_859_404 gas [ 7ns ] (computed in 24.531307348s)
RocksDbInsertValueByte 8_055_810 gas [ 8ns ] (computed in 27.72911072s)
RocksDbInsertValueByte 8_998_820 gas [ 8ns ] (computed in 26.002797888s)
RocksDbInsertValueByte 9_044_929 gas [ 9ns ] (computed in 23.025768285s)
RocksDbInsertValueByte 9_232_981 gas [ 9ns ] (computed in 23.33538747s)

RocksDbReadValueByte   6_704_198 gas [ 6ns ] (computed in 25.506228033s)
RocksDbReadValueByte   6_718_421 gas [ 6ns ] (computed in 25.438596596s)
RocksDbReadValueByte   7_174_273 gas [ 7ns ] (computed in 25.390843064s)
RocksDbReadValueByte   7_337_272 gas [ 7ns ] (computed in 24.012619703s)
RocksDbReadValueByte   7_358_555 gas [ 7ns ] (computed in 25.215203317s)
RocksDbReadValueByte   8_112_395 gas [ 8ns ] (computed in 23.937673994s)
RocksDbReadValueByte   8_929_693 gas [ 8ns ] (computed in 27.093712441s)
RocksDbReadValueByte   8_964_181 gas [ 8ns ] (computed in 28.158183615s)
  • Loading branch information
EdvardD committed Mar 3, 2022
1 parent 07f91c7 commit 0197c40
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,18 @@

* Enable access key nonce range for implicit accounts to prevent tx hash collisions [#5482](https://github.com/near/nearcore/pull/5482)

### Non-protocol Changes

* Switch to LZ4+ZSTD compression from Snappy in RocksDB [#6365](https://github.com/near/nearcore/pull/6365)

## `1.23.0` [13-12-2021]

### Protocol Changes

* Further lower regular_op_cost from 2_207_874 to 822_756.
* Limit number of wasm functions in one contract to 10_000. [#4954](https://github.com/near/nearcore/pull/4954)
* Add block header v3, required by new validator selection algorithm
* Move to new validator selection and sampling algorithm. Now we would be able to use all available seats. First step to enable chunk only producers.
* Move to new validator selection and sampling algorithm. Now we would be able to use all available seats. First step to enable chunk only producers.

### Non-protocol Changes

Expand Down
24 changes: 23 additions & 1 deletion core/store/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,10 +759,32 @@ impl Database for TestDB {
}
}

fn set_compression_options(opts: &mut Options) {
opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
opts.set_bottommost_compression_type(rocksdb::DBCompressionType::Zstd);
// RocksDB documenation says that 16KB is a typical dictionary size.
// We've empirically tuned the dicionary size to twice of that 'typical' size.
// Having train data size x100 from dictionary size is a recommendation from RocksDB.
// See: https://rocksdb.org/blog/2021/05/31/dictionary-compression.html?utm_source=dbplatz
let dict_size = 2 * 16384;
let max_train_bytes = dict_size * 100;
// We use default parameters of RocksDB here:
// window_bits is -14 and is unused (Zlib-specific parameter),
// compression_level is 32767 meaning the default compression level for ZSTD,
// compression_strategy is 0 and is unused (Zlib-specific parameter).
// See: https://github.com/facebook/rocksdb/blob/main/include/rocksdb/advanced_options.h#L176:
opts.set_bottommost_compression_options(
/*window_bits */ -14, /*compression_level */ 32767,
/*compression_strategy */ 0, dict_size, /*enabled */ true,
);
opts.set_bottommost_zstd_max_train_bytes(max_train_bytes, true);
}

/// DB level options
fn rocksdb_options() -> Options {
let mut opts = Options::default();

set_compression_options(&mut opts);
opts.create_missing_column_families(true);
opts.create_if_missing(true);
opts.set_use_fsync(false);
Expand Down Expand Up @@ -817,12 +839,12 @@ fn choose_cache_size(col: DBCol) -> usize {

fn rocksdb_column_options(col: DBCol) -> Options {
let mut opts = Options::default();
set_compression_options(&mut opts);
opts.set_level_compaction_dynamic_level_bytes(true);
let cache_size = choose_cache_size(col);
opts.set_block_based_table_factory(&rocksdb_block_based_options(cache_size));
opts.optimize_level_style_compaction(128 * bytesize::MIB as usize);
opts.set_target_file_size_base(64 * bytesize::MIB);
opts.set_compression_per_level(&[]);
if col.is_rc() {
opts.set_merge_operator("refcount merge", RocksDB::refcount_merge, RocksDB::refcount_merge);
opts.set_compaction_filter("empty value filter", RocksDB::empty_value_compaction_filter);
Expand Down

0 comments on commit 0197c40

Please sign in to comment.