Merge pull request #13 from josehu07/crossword

Cumulated updates to benchmarking & related features
josehu07 · Nov 12, 2023 · eab0934 · eab0934
2 parents 0e6afca + 6775db1
commit eab0934
Show file tree

Hide file tree

Showing 96 changed files with 19,391 additions and 14,660 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,8 @@ authors = ["Guanzhou Jose Hu <huguanzhou123@gmail.com>"]
 
 [dependencies]
 async-trait = "0.1"
-fixedbitset = "0.4"
+fixedbitset = { version = "0.4", features = ["serde"] }
+rangemap = "1.4"
 flashmap = "0.1"
 bytes = { version = "1.4", features = ["serde"] }
 futures = "0.3"
@@ -24,3 +25,12 @@ log = "0.4"
 reed-solomon-erasure = { version = "6.0", features = ["simd-accel"] }
 ctrlc = { version = "3.4", features = ["termination"] }
 get-size = { version = "0.1", features = ["derive"] }
+linreg = "0.2"
+statistical = "1.0"
+
+[dev-dependencies]
+criterion = "0.5"
+
+[[bench]]
+name = "rse_bench"
+harness = false
diff --git a/README.md b/README.md
@@ -71,9 +71,9 @@ Formal TLA+ specification of some protocols are provided in `tla+/`.
 - **Async Rust**: Summerset is written in Rust and demonstrates canonical usage of async programming structures backed by the [`tokio`](https://tokio.rs/) framework;
 - **Event-based**: Summerset adopts a channel-oriented, event-based system architecture; each replication protocol is basically just a set of event handlers plus a `tokio::select!` loop;
 - **Modularized**: Common components of a distributed KV store, e.g. network transport and durable logger, are cleanly separated from each other and connected through channels.
-- **Protocol-generic**: With the above two points combined, Summerset is able to support a set of different replication protocols in one codebase, each being just a single file, with common functionalities abstracted out.
+- **Protocol-generic**: With the above two points combined, Summerset is able to support a set of different replication protocols in one codebase, with common functionalities abstracted out.
 
-These design choices make protocol implementation in Summerset surprisingly straight-forward and **understandable**, without any sacrifice on performance. Comments / issues / PRs are always welcome!
+These design choices make protocol implementation in Summerset straight-forward and understandable, without any sacrifice on performance. Comments / issues / PRs are always welcome!
 
 </details>
 
@@ -172,8 +172,9 @@ Complete cluster management and benchmarking scripts are available in another re
   - [x] fault recovery reads
   - [x] follower gossiping
   - [x] fall-back mechanism
-  - [ ] workload adaptiveness
-  - [ ] unbalanced assignment
+  - [x] workload adaptiveness
+  - [x] unbalanced assignment
+  - [ ] allow dynamic RS scheme over time
   - [ ] TLA+ spec
 - [x] client-side utilities
   - [x] REPL-style client

diff --git a/benches/rse_bench.rs b/benches/rse_bench.rs
@@ -0,0 +1,86 @@
+//! Reed-Solomon erasure coding computation overhead benchmarking.
+
+use std::fmt;
+use std::collections::HashMap;
+use std::time::Duration;
+
+use summerset::{SummersetError, RSCodeword};
+
+use rand::Rng;
+use rand::distributions::Alphanumeric;
+
+use reed_solomon_erasure::galois_8::ReedSolomon;
+
+use criterion::{
+    black_box, criterion_group, criterion_main, BenchmarkId, Criterion,
+};
+
+use lazy_static::lazy_static;
+
+static SCHEMES: [(u8, u8); 4] = [(3, 2), (6, 4), (9, 6), (12, 8)];
+static SIZES: [usize; 6] = [
+    4096,
+    16 * 1024,
+    64 * 1024,
+    256 * 1024,
+    1024 * 1024,
+    4096 * 1024,
+];
+
+struct BenchId(pub usize, pub (u8, u8));
+
+impl fmt::Display for BenchId {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}@({},{})", self.0, self.1 .0, self.1 .1)
+    }
+}
+
+lazy_static!(
+    /// A very long pre-generated value string to get values from.
+    static ref MOM_VALUE: String = rand::thread_rng()
+        .sample_iter(&Alphanumeric)
+        .take(4096 * 1024)
+        .map(char::from)
+        .collect();
+
+    /// Reed-Solomon coder.
+    static ref RS_CODER: HashMap<(u8, u8), ReedSolomon> = SCHEMES
+        .iter()
+        .map(|&s| (s, ReedSolomon::new(s.0 as usize, s.1 as usize).unwrap()))
+        .collect();
+);
+
+fn compute_codeword(
+    size: usize,
+    scheme: (u8, u8),
+) -> Result<(), SummersetError> {
+    let value = MOM_VALUE[..size].to_string();
+    let mut cw = RSCodeword::<String>::from_data(value, scheme.0, scheme.1)?;
+    cw.compute_parity(Some(RS_CODER.get(&scheme).unwrap()))?;
+    black_box(Ok(()))
+}
+
+fn rse_bench_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("rse_bench");
+    group
+        .sample_size(50)
+        .warm_up_time(Duration::from_millis(100))
+        .measurement_time(Duration::from_secs(6));
+
+    for size in SIZES {
+        for scheme in SCHEMES {
+            group.bench_with_input(
+                BenchmarkId::from_parameter(BenchId(size, scheme)),
+                &BenchId(size, scheme),
+                |b, bench_id| {
+                    b.iter(|| compute_codeword(bench_id.0, bench_id.1));
+                },
+            );
+        }
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, rse_bench_group);
+criterion_main!(benches);
diff --git a/models/bench_rs_coding.py b/models/bench_rs_coding.py
@@ -0,0 +1,171 @@
+import argparse
+import subprocess
+import multiprocessing
+
+import matplotlib  # type: ignore
+
+matplotlib.use("Agg")
+
+import matplotlib.pyplot as plt  # type: ignore
+import numpy as np  # type: ignore
+
+
+BENCH_GROUP_NAME = "rse_bench"
+
+
+def printer_thread(proc, output_file):
+    with open(output_file, "w") as fout:
+        for line in iter(proc.stdout.readline, b""):
+            l = line.decode()
+            print(l, end="")
+            fout.write(l)
+
+
+def run_criterion_group(output_dir):
+    cmd = ["cargo", "bench", "--", BENCH_GROUP_NAME]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+
+    printer = multiprocessing.Process(
+        target=printer_thread, args=(proc, f"{output_dir}/rs_coding.out")
+    )
+    printer.start()
+
+    proc.wait()
+    printer.terminate()
+
+
+def parse_bench_results(output_dir):
+    results = dict()
+    with open(f"{output_dir}/rs_coding.out", "r") as fout:
+        curr_round = None
+        for line in fout:
+            if line.startswith(f"{BENCH_GROUP_NAME}/"):
+                line = line.strip()
+                name = line[line.find("/") + 1 : line.find(")") + 1]
+                size = int(name[: name.find("@")])
+                d = int(name[name.find("(") + 1 : name.find(",")])
+                p = int(name[name.find(",") + 1 : name.find(")")])
+                curr_round = (size, (d, p))
+
+            if curr_round is not None and "time:" in line:
+                segs = line.split()
+                time = float(segs[-4])
+                unit = segs[-3]
+                if unit == "ms":
+                    pass
+                elif unit == "ns":
+                    time /= 1000000
+                else:  # us
+                    time /= 1000
+
+                results[curr_round] = time
+                curr_round = None
+
+    return results
+
+
+def print_bench_results(results):
+    print("Results:")
+    for r, ms in results.items():
+        print(f"  {r[0]:7d} ({r[1][0]:2d},{r[1][1]:2d})  {ms:6.3f} ms")
+
+
+def plot_bench_results(results, output_dir):
+    matplotlib.rcParams.update(
+        {
+            "figure.figsize": (3.6, 1.6),
+            "font.size": 10,
+        }
+    )
+    fig = plt.figure("Bench")
+
+    xs, ys = [], []
+    for r in results:
+        if r[0] not in xs:
+            xs.append(r[0])
+        if r[1] not in ys:
+            ys.append(r[1])
+    xs.sort()
+    ys.sort(reverse=True)
+
+    data = [[0.0 for _ in xs] for _ in ys]
+    vmin, vmax = float("inf"), 0.0
+    for r, ms in results.items():
+        xi, yi = xs.index(r[0]), ys.index(r[1])
+        data[yi][xi] = ms
+        if ms > vmax:
+            vmax = ms
+        if ms < vmin:
+            vmin = ms
+
+    cmap = plt.get_cmap("Reds")
+    colors = cmap(np.linspace(1.0 - (vmax - vmin) / float(vmax), 0.6, cmap.N))
+    new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("Reds", colors)
+
+    plt.imshow(data, cmap=new_cmap, aspect="equal", norm="log")
+    plt.colorbar(
+        aspect=12,
+        shrink=0.85,
+        ticks=[vmin, 1, 10],
+        format="{x:.0f}ms",
+    )
+
+    def readable_size(size):
+        if size >= 1024 * 1024:
+            return f"{size // (1024*1024)}M"
+        elif size >= 1024:
+            return f"{size // 1024}K"
+        else:
+            return size
+
+    def readable_time(ms):
+        if ms < 0.1:
+            return f"{ms*1000:.0f}μs"
+        elif ms < 1.0:
+            return f".{ms*10:.0f}ms"
+        else:
+            return f"{ms:.0f}ms"
+
+    for r, ms in results.items():
+        xi, yi = xs.index(r[0]), ys.index(r[1])
+        plt.text(
+            xi,
+            yi,
+            readable_time(ms),
+            horizontalalignment="center",
+            verticalalignment="center",
+            color="black",
+            fontsize=8,
+        )
+
+    xticks = [readable_size(x) for x in xs]
+    plt.xticks(list(range(len(xticks))), xticks)
+
+    yticks = [f"({d+p},{d})" for d, p in ys]
+    plt.yticks(list(range(len(yticks))), yticks)
+
+    plt.tight_layout()
+
+    pdf_name = f"{output_dir}/rs_coding.pdf"
+    plt.savefig(pdf_name, bbox_inches=0)
+    plt.close()
+    print(f"Plotted: {pdf_name}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(allow_abbrev=False)
+    parser.add_argument(
+        "-p", "--plot", action="store_true", help="if set, do the plotting phase"
+    )
+    parser.add_argument(
+        "-o", "--output_dir", type=str, default="./results", help="output folder"
+    )
+    args = parser.parse_args()
+
+    if not args.plot:
+        run_criterion_group(args.output_dir)
+
+    else:
+        results = parse_bench_results(args.output_dir)
+        print_bench_results(results)
+        plot_bench_results(results, args.output_dir)
diff --git a/models/plot_cstr_bounds.py b/models/plot_cstr_bounds.py
@@ -102,7 +102,8 @@ def plot_cstr_bound(idx, cluster_size):
         linewidth=1,
         color="dimgray",
         length_includes_head=True,
-        head_width=0.3,
+        head_width=0.2,
+        head_length=0.25,
         overhang=0.5,
         clip_on=False,
         label="Tradeoff decisions",
@@ -123,7 +124,8 @@ def plot_cstr_bound(idx, cluster_size):
         linewidth=1,
         color="dimgray",
         length_includes_head=True,
-        head_width=0.3,
+        head_width=0.2,
+        head_length=0.25,
         overhang=0.5,
         clip_on=False,
     )
@@ -247,7 +249,7 @@ def plot_all_cstr_bounds(output_dir):
     make_legend(fig, handles, labels)
 
     plt.tight_layout(pad=1.0)
-    plt.savefig(f"{output_dir}/cstr_bounds.png", dpi=300)
+    plt.savefig(f"{output_dir}/cstr_bounds.pdf", bbox_inches=0)
 
 
 if __name__ == "__main__":

diff --git a/models/prob_calculation.py b/models/prob_calculation.py
@@ -242,8 +242,8 @@ def plot_all_env_results(results, output_dir):
     fig.subplots_adjust(bottom=0.16, top=0.9, left=0.23, right=0.75)
 
     plt.savefig(
-        f"{output_dir}/calc.envs.r_{CLUSTER}.png",
-        dpi=300,
+        f"{output_dir}/calc.envs.r_{CLUSTER}.pdf",
+        bbox_inches=0,
     )
     plt.close()
 

diff --git a/models/prob_tc_match.py → models/test_tc_match.py b/models/prob_tc_match.py → models/test_tc_match.py