Skip to content

Commit

Permalink
Merge pull request #13 from josehu07/crossword
Browse files Browse the repository at this point in the history
Cumulated updates to benchmarking & related features
  • Loading branch information
josehu07 committed Nov 12, 2023
2 parents 0e6afca + 6775db1 commit eab0934
Show file tree
Hide file tree
Showing 96 changed files with 19,391 additions and 14,660 deletions.
588 changes: 576 additions & 12 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ authors = ["Guanzhou Jose Hu <huguanzhou123@gmail.com>"]

[dependencies]
async-trait = "0.1"
fixedbitset = "0.4"
fixedbitset = { version = "0.4", features = ["serde"] }
rangemap = "1.4"
flashmap = "0.1"
bytes = { version = "1.4", features = ["serde"] }
futures = "0.3"
Expand All @@ -24,3 +25,12 @@ log = "0.4"
reed-solomon-erasure = { version = "6.0", features = ["simd-accel"] }
ctrlc = { version = "3.4", features = ["termination"] }
get-size = { version = "0.1", features = ["derive"] }
linreg = "0.2"
statistical = "1.0"

[dev-dependencies]
criterion = "0.5"

[[bench]]
name = "rse_bench"
harness = false
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ Formal TLA+ specification of some protocols are provided in `tla+/`.
- **Async Rust**: Summerset is written in Rust and demonstrates canonical usage of async programming structures backed by the [`tokio`](https://tokio.rs/) framework;
- **Event-based**: Summerset adopts a channel-oriented, event-based system architecture; each replication protocol is basically just a set of event handlers plus a `tokio::select!` loop;
- **Modularized**: Common components of a distributed KV store, e.g. network transport and durable logger, are cleanly separated from each other and connected through channels.
- **Protocol-generic**: With the above two points combined, Summerset is able to support a set of different replication protocols in one codebase, each being just a single file, with common functionalities abstracted out.
- **Protocol-generic**: With the above two points combined, Summerset is able to support a set of different replication protocols in one codebase, with common functionalities abstracted out.

These design choices make protocol implementation in Summerset surprisingly straight-forward and **understandable**, without any sacrifice on performance. Comments / issues / PRs are always welcome!
These design choices make protocol implementation in Summerset straight-forward and understandable, without any sacrifice on performance. Comments / issues / PRs are always welcome!

</details>

Expand Down Expand Up @@ -172,8 +172,9 @@ Complete cluster management and benchmarking scripts are available in another re
- [x] fault recovery reads
- [x] follower gossiping
- [x] fall-back mechanism
- [ ] workload adaptiveness
- [ ] unbalanced assignment
- [x] workload adaptiveness
- [x] unbalanced assignment
- [ ] allow dynamic RS scheme over time
- [ ] TLA+ spec
- [x] client-side utilities
- [x] REPL-style client
Expand Down
86 changes: 86 additions & 0 deletions benches/rse_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//! Reed-Solomon erasure coding computation overhead benchmarking.

use std::fmt;
use std::collections::HashMap;
use std::time::Duration;

use summerset::{SummersetError, RSCodeword};

use rand::Rng;
use rand::distributions::Alphanumeric;

use reed_solomon_erasure::galois_8::ReedSolomon;

use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
};

use lazy_static::lazy_static;

static SCHEMES: [(u8, u8); 4] = [(3, 2), (6, 4), (9, 6), (12, 8)];
static SIZES: [usize; 6] = [
4096,
16 * 1024,
64 * 1024,
256 * 1024,
1024 * 1024,
4096 * 1024,
];

struct BenchId(pub usize, pub (u8, u8));

impl fmt::Display for BenchId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}@({},{})", self.0, self.1 .0, self.1 .1)
}
}

lazy_static!(
/// A very long pre-generated value string to get values from.
static ref MOM_VALUE: String = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(4096 * 1024)
.map(char::from)
.collect();

/// Reed-Solomon coder.
static ref RS_CODER: HashMap<(u8, u8), ReedSolomon> = SCHEMES
.iter()
.map(|&s| (s, ReedSolomon::new(s.0 as usize, s.1 as usize).unwrap()))
.collect();
);

fn compute_codeword(
size: usize,
scheme: (u8, u8),
) -> Result<(), SummersetError> {
let value = MOM_VALUE[..size].to_string();
let mut cw = RSCodeword::<String>::from_data(value, scheme.0, scheme.1)?;
cw.compute_parity(Some(RS_CODER.get(&scheme).unwrap()))?;
black_box(Ok(()))
}

fn rse_bench_group(c: &mut Criterion) {
let mut group = c.benchmark_group("rse_bench");
group
.sample_size(50)
.warm_up_time(Duration::from_millis(100))
.measurement_time(Duration::from_secs(6));

for size in SIZES {
for scheme in SCHEMES {
group.bench_with_input(
BenchmarkId::from_parameter(BenchId(size, scheme)),
&BenchId(size, scheme),
|b, bench_id| {
b.iter(|| compute_codeword(bench_id.0, bench_id.1));
},
);
}
}

group.finish();
}

criterion_group!(benches, rse_bench_group);
criterion_main!(benches);
171 changes: 171 additions & 0 deletions models/bench_rs_coding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import argparse
import subprocess
import multiprocessing

import matplotlib # type: ignore

matplotlib.use("Agg")

import matplotlib.pyplot as plt # type: ignore
import numpy as np # type: ignore


BENCH_GROUP_NAME = "rse_bench"


def printer_thread(proc, output_file):
with open(output_file, "w") as fout:
for line in iter(proc.stdout.readline, b""):
l = line.decode()
print(l, end="")
fout.write(l)


def run_criterion_group(output_dir):
cmd = ["cargo", "bench", "--", BENCH_GROUP_NAME]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)

printer = multiprocessing.Process(
target=printer_thread, args=(proc, f"{output_dir}/rs_coding.out")
)
printer.start()

proc.wait()
printer.terminate()


def parse_bench_results(output_dir):
results = dict()
with open(f"{output_dir}/rs_coding.out", "r") as fout:
curr_round = None
for line in fout:
if line.startswith(f"{BENCH_GROUP_NAME}/"):
line = line.strip()
name = line[line.find("/") + 1 : line.find(")") + 1]
size = int(name[: name.find("@")])
d = int(name[name.find("(") + 1 : name.find(",")])
p = int(name[name.find(",") + 1 : name.find(")")])
curr_round = (size, (d, p))

if curr_round is not None and "time:" in line:
segs = line.split()
time = float(segs[-4])
unit = segs[-3]
if unit == "ms":
pass
elif unit == "ns":
time /= 1000000
else: # us
time /= 1000

results[curr_round] = time
curr_round = None

return results


def print_bench_results(results):
print("Results:")
for r, ms in results.items():
print(f" {r[0]:7d} ({r[1][0]:2d},{r[1][1]:2d}) {ms:6.3f} ms")


def plot_bench_results(results, output_dir):
matplotlib.rcParams.update(
{
"figure.figsize": (3.6, 1.6),
"font.size": 10,
}
)
fig = plt.figure("Bench")

xs, ys = [], []
for r in results:
if r[0] not in xs:
xs.append(r[0])
if r[1] not in ys:
ys.append(r[1])
xs.sort()
ys.sort(reverse=True)

data = [[0.0 for _ in xs] for _ in ys]
vmin, vmax = float("inf"), 0.0
for r, ms in results.items():
xi, yi = xs.index(r[0]), ys.index(r[1])
data[yi][xi] = ms
if ms > vmax:
vmax = ms
if ms < vmin:
vmin = ms

cmap = plt.get_cmap("Reds")
colors = cmap(np.linspace(1.0 - (vmax - vmin) / float(vmax), 0.6, cmap.N))
new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("Reds", colors)

plt.imshow(data, cmap=new_cmap, aspect="equal", norm="log")
plt.colorbar(
aspect=12,
shrink=0.85,
ticks=[vmin, 1, 10],
format="{x:.0f}ms",
)

def readable_size(size):
if size >= 1024 * 1024:
return f"{size // (1024*1024)}M"
elif size >= 1024:
return f"{size // 1024}K"
else:
return size

def readable_time(ms):
if ms < 0.1:
return f"{ms*1000:.0f}μs"
elif ms < 1.0:
return f".{ms*10:.0f}ms"
else:
return f"{ms:.0f}ms"

for r, ms in results.items():
xi, yi = xs.index(r[0]), ys.index(r[1])
plt.text(
xi,
yi,
readable_time(ms),
horizontalalignment="center",
verticalalignment="center",
color="black",
fontsize=8,
)

xticks = [readable_size(x) for x in xs]
plt.xticks(list(range(len(xticks))), xticks)

yticks = [f"({d+p},{d})" for d, p in ys]
plt.yticks(list(range(len(yticks))), yticks)

plt.tight_layout()

pdf_name = f"{output_dir}/rs_coding.pdf"
plt.savefig(pdf_name, bbox_inches=0)
plt.close()
print(f"Plotted: {pdf_name}")


if __name__ == "__main__":
parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument(
"-p", "--plot", action="store_true", help="if set, do the plotting phase"
)
parser.add_argument(
"-o", "--output_dir", type=str, default="./results", help="output folder"
)
args = parser.parse_args()

if not args.plot:
run_criterion_group(args.output_dir)

else:
results = parse_bench_results(args.output_dir)
print_bench_results(results)
plot_bench_results(results, args.output_dir)
8 changes: 5 additions & 3 deletions models/plot_cstr_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def plot_cstr_bound(idx, cluster_size):
linewidth=1,
color="dimgray",
length_includes_head=True,
head_width=0.3,
head_width=0.2,
head_length=0.25,
overhang=0.5,
clip_on=False,
label="Tradeoff decisions",
Expand All @@ -123,7 +124,8 @@ def plot_cstr_bound(idx, cluster_size):
linewidth=1,
color="dimgray",
length_includes_head=True,
head_width=0.3,
head_width=0.2,
head_length=0.25,
overhang=0.5,
clip_on=False,
)
Expand Down Expand Up @@ -247,7 +249,7 @@ def plot_all_cstr_bounds(output_dir):
make_legend(fig, handles, labels)

plt.tight_layout(pad=1.0)
plt.savefig(f"{output_dir}/cstr_bounds.png", dpi=300)
plt.savefig(f"{output_dir}/cstr_bounds.pdf", bbox_inches=0)


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions models/prob_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ def plot_all_env_results(results, output_dir):
fig.subplots_adjust(bottom=0.16, top=0.9, left=0.23, right=0.75)

plt.savefig(
f"{output_dir}/calc.envs.r_{CLUSTER}.png",
dpi=300,
f"{output_dir}/calc.envs.r_{CLUSTER}.pdf",
bbox_inches=0,
)
plt.close()

Expand Down
File renamed without changes.
Loading

0 comments on commit eab0934

Please sign in to comment.