WIP: Demo scaling benchmark sweep setup

Part of #10. Mostly done now to make sure performance JSON data is usable. Will be refined over time
RSE-Sheffield · Oct 31, 2024 · 346d0bb · 346d0bb
1 parent 7f58c84
commit 346d0bb
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -507,6 +507,7 @@ venv**/
 .venv**/
 *.png
 *.csv
+*.sh
 
 # vscode configuration files (debugging etc)
 .vscode/
diff --git a/data/scaling/bench.sh b/data/scaling/bench.sh
@@ -0,0 +1,21 @@
+#! /usr/bin/env bash
+# Demo bash script showing how a scaling benchmark might be carried out.
+
+# change to this directory
+SCRIPT_DIR=$(realpath $(dirname "$0"))
+cd "$(dirname "$0")"
+echo ${SCRIPT_DIR}
+
+BINARY=$(realpath ../../build/bin/Release/exatepp_abm)
+PARAMS=$(realpath params.csv)
+OUTPUT_DIR="outputs"
+COUNT=$(($(wc -l <"$PARAMS") - 1))
+
+mkdir -p ${OUTPUT_DIR}
+
+for ((i=0;i<COUNT;i++)); do
+    mkdir -p ${OUTPUT_DIR}/${i}
+    echo "run ${i} / ${COUNT}"
+    echo "  ${BINARY} -i ${PARAMS} -n ${i} -o \"${OUTPUT_DIR}/${i}\""
+    ${BINARY} -i ${PARAMS} -n ${i} -o "${OUTPUT_DIR}/${i}"
+done
diff --git a/data/scaling/params.csv b/data/scaling/params.csv
@@ -0,0 +1,9 @@
+rng_seed,param_id,duration,n_total,population_0_9,population_10_19,population_20_29,population_30_39,population_40_49,population_50_59,population_60_69,population_70_79,population_80,n_seed_infection
+0,0,365,1024,1,1,1,1,1,1,1,1,1,1
+0,1,365,2048,1,1,1,1,1,1,1,1,1,1
+0,2,365,4096,1,1,1,1,1,1,1,1,1,1
+0,3,365,8192,1,1,1,1,1,1,1,1,1,1
+0,4,365,16384,1,1,1,1,1,1,1,1,1,1
+0,5,365,32768,1,1,1,1,1,1,1,1,1,1
+0,6,365,65536,1,1,1,1,1,1,1,1,1,1
+0,7,365,131072,1,1,1,1,1,1,1,1,1,1
diff --git a/tools/plot-performance.py b/tools/plot-performance.py
@@ -0,0 +1,65 @@
+#! /usr/bin/env python3 
+import argparse
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pathlib
+import json
+
+
+def read_performance_json(inputs):
+    PERF_FILENAME="performance.json"
+    performance_data = []
+    for inp in inputs:
+        inp = pathlib.Path(inp)
+        if inp.is_file() and inp.name == PERF_FILENAME:
+            performance_data.append(json.load(inp))
+        elif inp.is_dir():
+            for file in pathlib.Path(inp).rglob("performance.json"):
+                with open(file, 'r') as f:
+                    performance_data.append(json.load(f))
+    df = pd.DataFrame.from_dict(performance_data)
+    return df
+
+def main():
+    parser = argparse.ArgumentParser(description="Plotting script for runtime/performance data")
+    parser.add_argument("inputs", type=pathlib.Path, nargs="+", help="Json files to plot")
+    parser.add_argument("-o", "--output", type=pathlib.Path, help="Path to output image location")
+    args = parser.parse_args()
+    print(args)
+
+    df = read_performance_json(args.inputs)
+
+    sns.set_palette("Dark2")
+    sns.set_context("talk")
+    sns.set_style("darkgrid")
+
+    fig, axes = plt.subplots(1, 2, figsize=(16, 9), sharex=True)
+
+    g0 = sns.lineplot(df, ax=axes[0], x="n_total",y="totalProgram", style="device_name")
+    axes[0].set_title("Total Runtime (s) vs population")
+    axes[0].set_xlim(left=0)
+    axes[0].set_ylim(bottom=0)
+
+
+    # Copy some columns from the dataframe
+    dfm =df[["device_name", "n_total", "configParsing", "simulate", "preSimulate", "postSimulate", "flamegpuSimulateElapsed"]].copy()
+    # Drop some columns pre-melt
+    dfm = dfm.melt(id_vars=["device_name", "n_total"], var_name="metric", value_name = "count")
+    print(dfm)
+
+
+    # for y in y_cols:
+    g1 = sns.lineplot(dfm, ax=axes[1], x="n_total", y="count", hue="metric", style="device_name")
+    axes[1].set_title("Split timing information")
+    axes[1].set_xlim(left=0)
+    axes[1].set_ylim(bottom=0)
+
+    if (args.output):
+        plt.savefig(args.output)
+    else:
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()