argumentcomputer · samuelburnham · Feb 20, 2024 · Feb 22, 2024 · Feb 27, 2024 · Feb 28, 2024
diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml
@@ -3,7 +3,20 @@
 # - `gh-pages` branch with Pages deployment set up
 # - Ideally some HTML to link to the reports, e.g. https://lurk-lab.github.io/ci-lab/
 # - Self-hosted runner attached to the caller repo with `gpu-bench` and `gh-pages` tags
-# - `justfile` with a `gpu-bench-ci` recipe that outputs `<bench-name>-<short-sha>.json`
+# - `justfile` with a `gpu-bench-ci` recipe that outputs `<bench-name>-<short-commit-hash>.json`
+#
+# The core file structure is a `benchmarks/history` directory on the `gh-pages` branch that contains:
+# - Historical data `.tar.gz` archives, one for each commit or workflow run, which contain the Criterion benchmark results
+#   and `Cargo.lock` for the given commit.
+# - Historical data in `plot-data.json`, which contains only the relevant metadata and average benchmark result for each of
+#   the saved benchmarks. This file is persistent and append-only, and if it's not found then it is re-created using each of
+#   the `tar.gz` archives
+# - `.png` plot images, created on each run using `plot-data.json`
+# - HTML to render the images.
+#
+# This structure is all created/deployed by the workflow after running the benchmarks,
+# with the only prerequisite being an existing `gh-pages` branch deployed via GitHub Pages.
+# See https://github.com/lurk-lab/ci-lab/tree/gh-pages as an example of the deployed plots
 name: Deploy GPU benchmark from default branch
 
 on:
@@ -14,6 +27,10 @@ on:
         required: false
         default: 'LURK'
         type: string
+      # List of prerequisite Ubuntu packages, separated by whitespace
+      packages:
+        required: false
+        type: string
 
 jobs:
   benchmark:
@@ -27,6 +44,9 @@ jobs:
         with:
           gpu-framework: 'cuda'
       - uses: ./.github/actions/ci-env
+      - uses: ./.github/actions/install-deps
+        with:
+          packages: "${{ inputs.packages }} pkg-config libfontconfig1-dev"
       # Install deps
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
@@ -54,28 +74,37 @@ jobs:
       - name: Run benchmarks
         run: |
           just gpu-bench-ci fibonacci
-          mv fibonacci-${{ env.COMMIT }}.json ..
+          mv fibonacci-${{ env.COMMIT }}.json ${{ github.workspace }}
         working-directory: ${{ github.workspace }}/benches
+      - uses: actions/checkout@v4
+        with:
+          repository: lurk-lab/ci-workflows
+          ref: bench-deploy
+          path: ci-workflows
       # If no plot data found, unzip all historical bench results to re-create the plots
       - name: Check for existing plot data
         run: |
-          if [ ! -f "plot-data.json" ]; then
-            shopt -s nullglob # Make glob patterns that match no files expand to a null string
-            tarballs=(./*.tar.gz)
+          shopt -s nullglob # Make glob patterns that match no files expand to a null string
+          if [ ! -f "${{ github.workspace }}/gh-pages/benchmarks/history/plot-data.json" ]; then
+            echo "No plot data found"
+            tarballs=(${{ github.workspace }}/gh-pages/benchmarks/history/*.tar.gz)
             if (( ${#tarballs[@]} )); then
               cat "${tarballs[@]}" | tar -xvzf - -i
             else
               echo "No tarballs found for extraction."
             fi
-            shopt -u nullglob # Disable nullglob option
           fi
-      # TODO: This should probably be in a subdirectory or Cargo workspace
+          shopt -u nullglob # Disable nullglob option
+        working-directory: ${{ github.workspace }}/ci-workflows
       # Saves the plot data to be deployed
       - name: Generate historical performance plot
         run: |
+          cp ${{ github.workspace }}/fibonacci-${{ env.COMMIT }}.json .
           cargo run
-          mkdir -p history
-          mv -f plot-data.json history
+          mv *.png ${{ github.workspace }}
+          mkdir -p ${{ github.workspace }}/history
+          mv -f plot-data.json ${{ github.workspace }}/history
+        working-directory: ${{ github.workspace }}/ci-workflows
       # TODO: Prettify labels for easier viewing
       # Compress the benchmark file and metadata for later analysis
       - name: Compress artifacts

diff --git a/Cargo.toml b/Cargo.toml
@@ -7,8 +7,10 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0"
+camino = "1.1.6"
 # chrono version is pinned to be compatible with plotters `build_cartesian_2d` API
 chrono = { version = "=0.4.20", features = ["clock", "serde"] }
+clap = { version = "4.5.1", features = ["derive"] }
 plotters = "0.3.5"
 serde = { version = "1.0.195", features = ["derive"] }
 serde_json = "1.0.111"

diff --git a/src/json.rs b/src/json.rs
@@ -2,7 +2,9 @@ use core::fmt;
 use std::io::Read;
 use std::{fs::File, path::Path};
 
-use serde::Deserialize;
+use anyhow::{anyhow, bail};
+use chrono::{DateTime, Utc};
+use serde::{de, Deserialize};
 use serde_json::de::{StrRead, StreamDeserializer};
 use serde_json::{Deserializer, Error, Value};
 
@@ -17,7 +19,7 @@ pub struct BenchData {
 pub struct BenchId {
     pub group_name: String,
     pub bench_name: String,
-    pub params: String,
+    pub params: BenchParams,
 }
 
 // Assumes three `String` elements in a Criterion bench ID: <group>/<name>/<params>
@@ -31,34 +33,72 @@ impl<'de> Deserialize<'de> for BenchId {
         let s = String::deserialize(deserializer)?;
         let id = s.split('/').collect::<Vec<&str>>();
         if id.len() != 3 {
-            Err(serde::de::Error::custom("Expected 3 bench ID elements"))
+            Err(de::Error::custom("Expected 3 bench ID elements"))
         } else {
-            let bench_name = id[1].replace('_', ":");
             Ok(BenchId {
                 group_name: id[0].to_owned(),
-                // Criterion converts `:` to `_` in the timestamp as the former is valid JSON syntax,
-                // so we convert `_` back to `:` when deserializing
-                bench_name,
-                params: id[2].to_owned(),
+                bench_name: id[1].to_owned(),
+                params: BenchParams::try_from(id[2])
+                    .map_err(|e| de::Error::custom(format!("{}", e)))?,
             })
         }
     }
 }
 
+#[derive(Debug, PartialEq)]
+pub struct BenchParams {
+    pub commit_hash: String,
+    pub commit_timestamp: DateTime<Utc>,
+    pub params: String,
+}
+
+impl TryFrom<&str> for BenchParams {
+    type Error = anyhow::Error;
+    // Splits a <commit-hash>-<commit-date>-<params> input into a (String, `DateTime`, String) object
+    // E.g. `dd2a8e6-2024-02-20T22:48:21-05:00-rc=100` becomes ("dd2a8e6", `<DateTime>`, "rc=100")
+    fn try_from(value: &str) -> anyhow::Result<Self> {
+        let (commit_hash, rest) = value
+            .split_once('-')
+            .ok_or_else(|| anyhow!("Invalid format for bench params"))?;
+        let arr: Vec<&str> = rest.split_inclusive('-').collect();
+        // Criterion converts `:` to `_` in the timestamp as the former is valid JSON syntax,
+        // so we convert `_` back to `:` when deserializing
+        let mut date: String = arr[..4]
+            .iter()
+            .flat_map(|s| s.chars())
+            .collect::<String>()
+            .replace('_', ":");
+        date.pop();
+        let params = arr[4..].iter().flat_map(|s| s.chars()).collect();
+
+        let commit_timestamp = DateTime::parse_from_rfc3339(&date).map_or_else(
+            |e| bail!("Failed to parse string into `DateTime`: {}", e),
+            |dt| Ok(dt.with_timezone(&Utc)),
+        )?;
+        Ok(Self {
+            commit_hash: commit_hash.to_owned(),
+            commit_timestamp,
+            params,
+        })
+    }
+}
+
 #[derive(Debug, Deserialize)]
 pub struct BenchResult {
     #[serde(rename = "estimate")]
     pub time: f64,
 }
 
 // Deserializes the benchmark JSON file into structured data for plotting
-pub fn read_json_from_file<P: AsRef<Path>>(path: P) -> Result<Vec<BenchData>, Error> {
+pub fn read_json_from_file<P: AsRef<Path>, T: for<'de> Deserialize<'de>>(
+    path: P,
+) -> Result<Vec<T>, Error> {
     let mut file = File::open(path).unwrap();
     let mut s = String::new();
     file.read_to_string(&mut s).unwrap();
 
     let mut data = vec![];
-    for result in ResilientStreamDeserializer::<BenchData>::new(&s).flatten() {
+    for result in ResilientStreamDeserializer::<T>::new(&s).flatten() {
         data.push(result);
     }
     Ok(data)
@@ -145,3 +185,23 @@ where
         }
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::json::BenchParams;
+    use chrono::{DateTime, Utc};
+
+    #[test]
+    fn parse_bench_params() {
+        let s = "dd2a8e6-2024-02-20T22:48:21-05:00-rc=100";
+        let params = BenchParams::try_from(s).unwrap();
+        let params_expected = BenchParams {
+            commit_hash: "dd2a8e6".into(),
+            commit_timestamp: DateTime::parse_from_rfc3339("2024-02-20T22:48:21-05:00")
+                .map(|dt| dt.with_timezone(&Utc))
+                .unwrap(),
+            params: "rc=100".into(),
+        };
+        assert_eq!(params, params_expected);
+    }
+}