Skip to content

Commit

Permalink
Merge pull request #28 from TranslatorSRI/benchmark_runner
Browse files Browse the repository at this point in the history
Benchmark runner
  • Loading branch information
maximusunc authored Nov 15, 2023
2 parents a2cc292 + 7577b1a commit 27a3834
Show file tree
Hide file tree
Showing 18 changed files with 197 additions and 42 deletions.
55 changes: 55 additions & 0 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: pypi

on:
push:
tags:
- v*

jobs:
build:
name: Build Python package
runs-on: ubuntu-latest
permissions:
id-token: write
steps:
- name: Check out the repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.release.target_commitish }}
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install build
run: >-
python -m
pip install
build
--user
- name: Build a binary wheel and a source tarball
run: >-
python -m
build
- name: Store the distribution packages
uses: actions/upload-artifact@v3
with:
name: python-package-distributions
path: dist/
publish-to-pypi:
name: Publish Python package to PyPI
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/benchmarks-runner
permissions:
id-token: write
steps:
- name: Download all the dists
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,27 @@ _Requires python 3.9._
- Install dependencies: `pip install -r requirements.txt`
- Start the frontend server: `python server.py`
- Open in your browser

## Benchmark Runner
The benchmarks can be installed from pypi and used as part of the Translator-wide automated testing.
- `pip install benchmarks-runner`
To run benchmarks:
```python
import asyncio
from benchmarks_runner import run_benchmarks

output = asyncio.run(run_benchmarks(<benchmark>, <target>))
```
where benchmark is the name of a benchmark that is specified in config/benchmarks.json, and a target that is specified in config/targets.json

### Sample Output
```
Benchmark: GTRx
Results Directory: /tmp/tmpaf10m9_q/GTRx/bte/2023-11-10_13-03-11
k=1 k=5 k=10 k=20
Precision @ k 0.0000 0.0500 0.0250 0.0125
Recall @ k 0.0000 0.2500 0.2500 0.2500
mAP @ k 0.0000 0.0833 0.0833 0.0833
Top-k Accuracy 0.0000 0.2500 0.2500 0.2500
Mean Reciprocal Rank 0.08333333333333333
```
Empty file removed benchmarks/utils/__init__.py
Empty file.
1 change: 1 addition & 0 deletions benchmarks_runner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .main import run_benchmarks
File renamed without changes.
48 changes: 37 additions & 11 deletions benchmarks/cli/eval.py → benchmarks_runner/cli/eval.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from argparse import ArgumentParser
import base64
import io
from matplotlib import pyplot as plt
from pathlib import Path
import os


from benchmarks.eval import evaluate_results
from benchmarks_runner.eval import evaluate_results

metrics_at_k = {
'Precision @ k\t\t': 'precision_at_k',
Expand All @@ -17,40 +19,63 @@
'Mean Reciprocal Rank\t': 'mean_reciprocal_rank'
}

def evaluate_ara_results(results_dir, args):
def evaluate_ara_results(
benchmark,
results_dir,
k: int = 20,
save_plots: bool = False,
save_json: bool = False,
):
results = evaluate_results(
args.benchmark,
benchmark,
results_dir,
k=args.k
k=k,
)
imgs = {}

if args.plots:
if save_plots:
plots_dir = Path(results_dir)
assert plots_dir.exists(), f"{plots_dir} does not exist."

results.plot_precision()
plt.gcf().savefig(plots_dir / 'precision.png')
with io.BytesIO() as buffer:
plt.gcf().savefig(buffer, format="png")
buffer.seek(0)
imgs["precision"] = base64.b64encode(buffer.read()).decode()

results.plot_recall()
plt.gcf().savefig(plots_dir / 'recall.png')
with io.BytesIO() as buffer:
plt.gcf().savefig(buffer, format="png")
buffer.seek(0)
imgs["recall"] = base64.b64encode(buffer.read()).decode()

results.plot_mAP()
plt.gcf().savefig(plots_dir / 'mAP.png')
with io.BytesIO() as buffer:
plt.gcf().savefig(buffer, format="png")
buffer.seek(0)
imgs["mAP"] = base64.b64encode(buffer.read()).decode()

results.plot_top_k_accuracy()
plt.gcf().savefig(plots_dir / 'top_k_accuracy.png')
with io.BytesIO() as buffer:
plt.gcf().savefig(buffer, format="png")
buffer.seek(0)
imgs["top_k_accuracy"] = base64.b64encode(buffer.read()).decode()

ks = [1, 5, 10, 20, 50, 100, 200, 500]
while ks[-1] >= args.k:
while ks[-1] >= k:
ks.pop()
ks.append(args.k)
ks.append(k)

if args.json:
if save_json:
results.to_json(results_dir)

output = [
'',
"Benchmark: {}".format(args.benchmark),
"Benchmark: {}".format(benchmark),
"Results Directory: {}\n".format(results_dir),
"\t\t\t{}".format('\t'.join(['k={}'.format(k) for k in ks]))
]
Expand All @@ -69,6 +94,7 @@ def evaluate_ara_results(results_dir, args):
output.append('')

print("\n".join(output))
return results.output_dict, imgs

def main():
parser = ArgumentParser(description="Run a benchmark on a set of results.")
Expand Down Expand Up @@ -109,6 +135,6 @@ def main():
# evaluate results for each ARA
for ara in [ara for ara in os.listdir(args.results_dir) if os.path.isdir(args.results_dir)]:
results_dir = os.path.join(args.results_dir, ara)
evaluate_ara_results(results_dir, args)
evaluate_ara_results(args.benchmark, results_dir, args.k, args.plots, args.json)
else:
evaluate_ara_results(args.results_dir, args)
evaluate_ara_results(args.benchmark, args.results_dir, args.k, args.plots, args.json)
9 changes: 5 additions & 4 deletions benchmarks/cli/fetch.py → benchmarks_runner/cli/fetch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from argparse import ArgumentParser
import asyncio

from benchmarks.request import fetch_results
from benchmarks_runner.request import fetch_results


def main():
Expand Down Expand Up @@ -38,13 +39,13 @@ def main():
)
args = parser.parse_args()

fetch_results(
output_dir = asyncio.run(fetch_results(
args.benchmark,
args.target,
args.results_dir,
overwrite=args.overwrite,
scored=not args.unscored,
num_concurrent_requests=args.n
)

))

print(f"Results saved to: {output_dir}")
9 changes: 4 additions & 5 deletions benchmarks/cli/score.py → benchmarks_runner/cli/score.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from argparse import ArgumentParser
import asyncio

from benchmarks.request import score_results
from benchmarks_runner.request import score_results


def main():
Expand Down Expand Up @@ -28,11 +29,9 @@ def main():
)
args = parser.parse_args()

score_results(
asyncio.run(score_results(
args.unscored_results_dir,
args.target,
args.scored_results_dir,
num_concurrent_requests=args.n
)


))
2 changes: 1 addition & 1 deletion benchmarks/eval.py → benchmarks_runner/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from matplotlib import pyplot as plt
from matplotlib.axes import Axes

from benchmarks.utils.benchmark import benchmark_ground_truth
from benchmarks_runner.utils.benchmark import benchmark_ground_truth


def evaluate_results(
Expand Down
36 changes: 36 additions & 0 deletions benchmarks_runner/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Main Benchmarks Test Runner entry."""
import asyncio
import os
import tempfile

from benchmarks_runner.request import fetch_results
from benchmarks_runner.cli.eval import evaluate_ara_results


async def run_benchmarks(
benchmark: str,
target: str,
):
"""Run benchmark tests."""
with tempfile.TemporaryDirectory() as tmpdir:
output_dir = await fetch_results(benchmark, target, tmpdir)
output_dict = {}
output_imgs = {}
if target == 'ars':
# evaluate results for each ARA
for ara in [ara for ara in os.listdir(output_dir) if os.path.isdir(output_dir)]:
results_dir = os.path.join(output_dir, ara)
ara_output_dict, ara_imgs = evaluate_ara_results(benchmark, results_dir, save_plots=True)
output_imgs[ara] = ara_imgs
else:
output_dict, imgs = evaluate_ara_results(benchmark, output_dir, save_plots=True)
output_imgs[target] = imgs

return output_dict, imgs


if __name__ == "__main__":
asyncio.run(run_benchmarks(
"ameliorates",
"aragorn",
))
32 changes: 17 additions & 15 deletions benchmarks/request.py → benchmarks_runner/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# double the ARS timeout, just in case. The ARS should set all queries to error after 5 mins
MAX_QUERY_TIME = os.getenv("MAX_QUERY_TIME", 600)

def fetch_results(
async def fetch_results(
benchmark: str,
target: str,
results_dir: str,
Expand Down Expand Up @@ -82,7 +82,7 @@ def fetch_results(
Path(output_dir).mkdir(parents=True, exist_ok=True)

if target == "ars":
send_requests_to_ars(
await send_requests_to_ars(
uids,
messages,
url,
Expand All @@ -93,17 +93,19 @@ def fetch_results(

else:

send_requests_store_results(
await send_requests_store_results(
uids,
messages,
url,
output_dir,
num_concurrent_requests,
progress,
)

return output_dir


def send_requests_to_ars(
async def send_requests_to_ars(
uids: Sequence[str],
messages: Sequence[dict],
url: str,
Expand All @@ -116,7 +118,7 @@ def send_requests_to_ars(
send_request_to_ars(uid, msg, url, output_dir, pbar)
for uid, msg in zip(uids, messages)
]
asyncio.run(gather(*coroutines, limit=num_concurrent_requests))
await gather(*coroutines, limit=num_concurrent_requests)

if pbar is not None:
pbar.close()
Expand Down Expand Up @@ -190,18 +192,18 @@ async def send_request_to_ars(
response = await send_request(uid, f"{url}/messages/{parent_pk}", msg, request_type="get")
merged_pk = response.get("fields", {}).get("merged_version")
if merged_pk is None:
raise Exception("Failed to get the ARS merged message.")

merged_message = await send_request(uid, f"{url}/messages/{merged_pk}", msg, request_type="get")
Path(os.path.join(output_dir, "ars")).mkdir(parents=True, exist_ok=True)
with open(os.path.join(output_dir, "ars", f"{uid}.json"), "w") as file:
json.dump(merged_message, file)
print(f"Failed to get the ARS merged message from pk: {parent_pk}.")
else:
merged_message = await send_request(uid, f"{url}/messages/{merged_pk}", msg, request_type="get")
Path(os.path.join(output_dir, "ars")).mkdir(parents=True, exist_ok=True)
with open(os.path.join(output_dir, "ars", f"{uid}.json"), "w") as file:
json.dump(merged_message, file)

if pbar:
pbar.update()


def score_results(
async def score_results(
unscored_results_dir: str,
target: str,
scored_results_dir: str,
Expand Down Expand Up @@ -241,7 +243,7 @@ def score_results(
message['workflow'] = workflow
messages.append(message)

send_requests_store_results(
await send_requests_store_results(
uids,
messages,
url,
Expand All @@ -250,7 +252,7 @@ def score_results(
progress
)

def send_requests_store_results(
async def send_requests_store_results(
uids: Sequence[str],
messages: Sequence[dict],
url: str,
Expand All @@ -263,7 +265,7 @@ def send_requests_store_results(
send_request_store_result(uid, msg, url, output_dir, pbar)
for uid, msg in zip(uids, messages)
]
asyncio.run(gather(*coroutines, limit=num_concurrent_requests))
await gather(*coroutines, limit=num_concurrent_requests)

if pbar is not None:
pbar.close()
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from copy import deepcopy
from typing import Dict, List, Sequence, Tuple

from benchmarks.utils.constants import BENCHMARKS, CONFIG_DIR
from benchmarks_runner.utils.constants import BENCHMARKS, CONFIG_DIR

from .normalize import get_normalizer

Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 27a3834

Please sign in to comment.