From 830788c8c55f4481a99de5eaa64d146b54052a39 Mon Sep 17 00:00:00 2001
From: Agost Biro <agostbiro@gmail.com>
Date: Thu, 14 Mar 2024 19:41:41 +0100
Subject: [PATCH] ci: add performance benchmarks

---
 .github/workflows/edr-benchmark.yml    |  86 ++++++++++++++++++
 .gitignore                             |   3 +
 crates/tools/js/benchmark/README.md    |  10 +--
 crates/tools/js/benchmark/index.js     | 119 +++++++++++++++++++++----
 crates/tools/js/benchmark/package.json |   8 +-
 crates/tools/scenarios/snapshot.json   |   1 +
 pnpm-lock.yaml                         |   3 +
 7 files changed, 203 insertions(+), 27 deletions(-)
 create mode 100644 .github/workflows/edr-benchmark.yml
 create mode 100644 crates/tools/scenarios/snapshot.json

diff --git a/.github/workflows/edr-benchmark.yml b/.github/workflows/edr-benchmark.yml
new file mode 100644
index 00000000000..288814a9980
--- /dev/null
+++ b/.github/workflows/edr-benchmark.yml
@@ -0,0 +1,86 @@
+name: EDR Benchmark
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - ".github/workflows/edr-benchmark.yml"
+      - "rust-toolchain"
+      - "Cargo.lock"
+      - "Cargo.toml"
+      - "crates/**"
+  pull_request:
+    branches:
+      - "**"
+    paths:
+      - ".github/workflows/edr-benchmark.yml"
+      - "rust-toolchain"
+      - "Cargo.lock"
+      - "Cargo.toml"
+      - "crates/**"
+  workflow_dispatch:
+
+defaults:
+  run:
+    working-directory: crates/tools/js/benchmark
+
+concurrency:
+  group: ${{github.workflow}}-${{github.ref}}
+  cancel-in-progress: true
+
+jobs:
+  js-benchmark:
+    name: Run JS scenario runner benchmark
+    environment: github-action-benchmark
+    runs-on: self-hosted
+    # Only run for trusted collaborators since third-parties could run malicious code on the self-hosted benchmark runner.
+    if: github.event_name != 'pull_request' || github.event.pull_request.author_association == 'OWNER' || github.event.pull_request.author_association == 'MEMBER' || github.event.pull_request.author_association == 'COLLABORATOR'
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: pnpm/action-setup@v2
+        with:
+          version: 8
+      - name: Install Node
+        uses: actions/setup-node@v2
+        with:
+          node-version: 20
+          cache: pnpm
+
+      - name: Install Rust (stable)
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          override: true
+
+      - name: Install package
+        run: pnpm install --frozen-lockfile --prefer-offline
+
+      - name: Run benchmark
+        run: pnpm run -s benchmark
+
+      - name: Validate regressions
+        run: pnpm run -s verify
+
+      - name: Generate report for github-action-benchmark
+        run: pnpm run -s report | tee report.json
+
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          tool: customSmallerIsBetter
+          output-file-path: crates/tools/js/benchmark/report.json
+          gh-repository: github.com/NomicFoundation/edr-benchmark-results
+          gh-pages-branch: main
+          benchmark-data-dir-path: bench
+          github-token: ${{ secrets.BENCHMARK_GITHUB_TOKEN }}
+          # Only save the data for main branch pushes. For PRs we only compare
+          auto-push: ${{ github.ref == 'refs/heads/main' && github.event_name != 'pull_request' }}
+          # TODO calibrate
+          alert-threshold: "105%"
+          # Only fail on pull requests, don't break CI in main
+          fail-on-alert: ${{ github.event_name == 'pull_request' }}
+          # Enable Job Summary for PRs
+          summary-always: true
+          max-items-in-chart: 1000
diff --git a/.gitignore b/.gitignore
index 947c59ccea4..0119d96c202 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,3 +114,6 @@ Brewfile.lock.json
 
 # Ipython Notebook
 .ipynb_checkpoints
+
+# Benchmark
+benchmark-output.json
diff --git a/crates/tools/js/benchmark/README.md b/crates/tools/js/benchmark/README.md
index 57f7311e942..27316d1088b 100644
--- a/crates/tools/js/benchmark/README.md
+++ b/crates/tools/js/benchmark/README.md
@@ -9,12 +9,6 @@ pnpm install
 pnpm run benchmark
 ```
 
-The measurements will be printed to stdout as machine-readable json and to stderr as human-readable output.
+The measurements will be printed to stdout as machine-readable json and to stderr and saved to `./benchmark-output.json` disk as json.
 
-## Grep
-
-It's possible to grep the output to run a specific scenario:
-
-```shell
-npm run benchmark -- --grep seaport
-```
+Please see `pnpm run help` for more.
diff --git a/crates/tools/js/benchmark/index.js b/crates/tools/js/benchmark/index.js
index 2036736e002..37f85ce63dc 100644
--- a/crates/tools/js/benchmark/index.js
+++ b/crates/tools/js/benchmark/index.js
@@ -3,6 +3,7 @@ const fs = require("fs");
 const readline = require("readline");
 const zlib = require("zlib");
 
+const { ArgumentParser } = require("argparse");
 const { _ } = require("lodash");
 
 const {
@@ -10,28 +11,108 @@ const {
 } = require("hardhat/internal/hardhat-network/provider/provider");
 
 const SCENARIOS_DIR = "../../scenarios/";
+const SCENARIO_SNAPSHOT_NAME = "snapshot.json";
 
-function usage() {
-  console.error("Usage: node index.js [--grep|-g <pattern>]");
-  process.exit(1);
+async function main() {
+  const parser = new ArgumentParser({
+    description: "Scenario benchmark runner",
+  });
+  parser.add_argument("command", {
+    choices: ["benchmark", "verify", "report"],
+    help: "Whether to run a benchmark, verify that there are no regressions or create a report for `github-action-benchmark`",
+  });
+  parser.add_argument("-g", "--grep", {
+    type: "str",
+    help: "Only execute the scenarios that contain the given string",
+  });
+  parser.add_argument("-o", "--benchmark-output", {
+    type: "str",
+    default: "./benchmark-output.json",
+    help: "Where to save the benchmark output file",
+  });
+  const args = parser.parse_args();
+
+  if (args.command === "benchmark") {
+    await benchmarkAllScenarios(args.benchmark_output, args.grep);
+    process.exit(0);
+  } else if (args.command === "verify") {
+    const success = await verify(args.benchmark_output);
+    process.exit(success ? 0 : 1);
+  } else if (args.command === "report") {
+    await report(args.benchmark_output);
+    process.exit(0);
+  }
 }
 
-async function main() {
-  const numArgs = process.argv.length;
+async function report(benchmarkResultPath) {
+  const benchmarkResult = require(benchmarkResultPath);
 
-  if (numArgs !== 2 && numArgs !== 4) {
-    usage();
+  let totalTime = 0;
+  const report = [];
+  for (let scenarioName in benchmarkResult) {
+    const scenarioResult = benchmarkResult[scenarioName];
+    report.push({
+      name: scenarioName,
+      unit: "ms",
+      value: scenarioResult.timeMs,
+    });
+    totalTime += scenarioResult.timeMs;
   }
+  report.push({
+    name: "All Scenarios",
+    unit: "ms",
+    value: totalTime,
+  });
 
-  let grep = undefined;
-  if (numArgs === 4) {
-    if (process.argv[2] !== "--grep" && process.argv[2] !== "-g") {
-      usage();
+  console.log(JSON.stringify(report));
+}
+
+async function verify(benchmarkResultPath) {
+  let success = true;
+  const benchmarkResult = require(benchmarkResultPath);
+  const snapshotResult = require(path.join(
+    getScenariosDir(),
+    SCENARIO_SNAPSHOT_NAME
+  ));
+
+  for (let scenarioName in snapshotResult) {
+    let snapshotFailures = new Set(snapshotResult[scenarioName].failures);
+    let benchFailures = new Set(benchmarkResult[scenarioName].failures);
+
+    if (!_.isEqual(snapshotFailures, benchFailures)) {
+      success = false;
+      const shouldFail = snapshotFailures.difference(benchFailures);
+      const shouldNotFail = benchFailures.difference(snapshotFailures);
+
+      // We're logging to stderr so that it doesn't pollute stdout where we write the result
+      console.error(`Snapshot failure for ${scenarioName}`);
+
+      if (shouldFail.size > 0) {
+        console.error(
+          `Scenario ${scenarioName} should fail at indexes ${Array.from(
+            shouldFail
+          ).sort()}`
+        );
+      }
+
+      if (shouldNotFail.size > 0) {
+        console.error(
+          `Scenario ${scenarioName} should not fail at indexes ${Array.from(
+            shouldNotFail
+          ).sort()}`
+        );
+      }
     }
+  }
 
-    grep = process.argv[3];
+  if (success) {
+    console.error("Benchmark result matches snapshot");
   }
 
+  return success;
+}
+
+async function benchmarkAllScenarios(outPath, grep) {
   const result = {};
   const scenariosDir = path.join(__dirname, SCENARIOS_DIR);
 
@@ -41,11 +122,11 @@ async function main() {
   let totalTime = 0;
   let totalFailures = 0;
   for (let scenarioFile of scenarioFiles) {
-    if (grep && !scenarioFile.includes(grep)) {
+    if (grep !== undefined && !scenarioFile.includes(grep)) {
       continue;
     }
     // Get the filename from the path
-    const scenarioResult = await runScenario(
+    const scenarioResult = await benchmarkScenario(
       path.join(scenariosDir, scenarioFile)
     );
     totalTime += scenarioResult.result.timeMs;
@@ -53,7 +134,7 @@ async function main() {
     result[scenarioResult.name] = scenarioResult.result;
   }
 
-  console.log(JSON.stringify(result));
+  fs.writeFileSync(outPath, JSON.stringify(result) + "\n");
 
   // Log info to stderr so that it doesn't pollute stdout where we write the result
   console.error(
@@ -62,10 +143,10 @@ async function main() {
     } seconds with ${totalFailures} failures.`
   );
 
-  process.exit(0);
+  console.error(`Benchmark results written to ${outPath}`);
 }
 
-async function runScenario(scenarioPath) {
+async function benchmarkScenario(scenarioPath) {
   const { config, requests } = await loadScenario(scenarioPath);
   const name = path.basename(scenarioPath).split(".")[0];
   console.error(`Running ${name} scenario`);
@@ -221,6 +302,10 @@ function readFile(path) {
   });
 }
 
+function getScenariosDir() {
+  return path.join(__dirname, SCENARIOS_DIR);
+}
+
 main().catch((error) => {
   console.error(error);
   process.exit(1);
diff --git a/crates/tools/js/benchmark/package.json b/crates/tools/js/benchmark/package.json
index c838a89b1d0..21bed492cea 100644
--- a/crates/tools/js/benchmark/package.json
+++ b/crates/tools/js/benchmark/package.json
@@ -5,13 +5,17 @@
   "description": "",
   "main": "index.js",
   "scripts": {
-    "benchmark": "node index.js",
-    "prebenchmark": "cd ../../../edr_napi/ && pnpm build && cd ../../packages/hardhat-core/ && pnpm build"
+    "benchmark": "node index.js benchmark",
+    "prebenchmark": "cd ../../../edr_napi/ && pnpm build && cd ../../packages/hardhat-core/ && pnpm build",
+    "verify": "node index.js verify",
+    "report": "node index.js report",
+    "help": "node index.js -h"
   },
   "keywords": [],
   "author": "",
   "license": "ISC",
   "dependencies": {
+    "argparse": "^2.0.1",
     "hardhat": "workspace:^",
     "lodash": "^4.17.11",
     "tsx": "^4.7.1"
diff --git a/crates/tools/scenarios/snapshot.json b/crates/tools/scenarios/snapshot.json
new file mode 100644
index 00000000000..01922c1da31
--- /dev/null
+++ b/crates/tools/scenarios/snapshot.json
@@ -0,0 +1 @@
+{"seaport_4f4e7c20":{"timeMs":12390.151999950409,"failures":[1144,1217,1290,1362,1435,1506,1579,1652,1725,1798,1871,1944,2036,2118,2209,2282,2355,2428,2492,2573,2637,2710,3174,3736,4657,5377,10275,13575,16175,16219,16238,16346,16713,16727,16739,16751,16809,16893,16905,16917,16929,16948,16955,16962,16980,16987,16992,17008,17024,17031,17250,17257,17321,17365,17374,17448,17457,17473,17480,17627,17667,17685,17727,17756,17798,17816,17869,17887,17990,18049,18160,18187,18271,18298,18552,18855,19087,19749,19758,19767,19844,19921,20030,20105,20147,20209,20251,20293,20337,20379,20421,20463,20513,20570,20578,20671,20738,20807,20856,20863,20903,20910,20917,20965,20972,20979,21044,21109,21157,21212,21267,21332,21397,21462,21527,21585,21643,21701,21759,21814,21862,21902,21950,21990,22030,22070,22110,22150,22190,22290,22344,22351,22358,22451,22458,22545,22622,22699,22706,22783,22861,22915,23014,23068,23143,23183,23223,23263,23303,23343,23383,23431,23479,23519,23594,23601,23668,23675,23724,23733,23816,23823,23860,23999,24068,24115,24196,24225,24342,24400,24468,24533,24774,24891,24976,25020,25092,25114,25136,25158,25198,25238,25260,25304,25336,25378,25418,25458,25470,25482,25533,25582,25631,25682,25731,25780,25839,25898,25949,26002,26053,26077,26101,26125,26149,26302,26410,26454,26498,26571,26904,26913,26980,27402,27459,27478,27670,28049,28371,28390,28573,28592,28626,28645,28679,28698,28741,28760,28803,28822,28871,28890,28942,28961,29019,29038,29096,29115,29167,29186,29238,29257,29343,29362,29402,29421,29507,29526,29612,29631,29683,29702,31063,32254,32273,33057,33076,33110,33129,33163,33182,33225,33244,33332,33384,33403,33457,33476,33526,33545,33618,33637,33689,33708,33760,33779,33865,33884,33924,33943,34029,34048,34134,34153,34205,34874,34912,34957,34964,34973,34982,35034,35098,35132,35194,35261,35286,35320,35327,35387,35449,35483,35492,35501,35510]}}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b899609f03e..6e87b3f5f62 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -59,6 +59,9 @@ importers:
 
   crates/tools/js/benchmark:
     dependencies:
+      argparse:
+        specifier: ^2.0.1
+        version: 2.0.1
       hardhat:
         specifier: workspace:^
         version: link:../../../../packages/hardhat-core