-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated requiremens, extract_ir perf improvements, benchmark reporting
* Using precise pip package versions in requirements.txt, to avoid unwanted upgrades. * Fix in extract_ir (authored by [email protected]), speedup in extraction by a few orders of magnitude. * Tools to post-process json benchmark reports, when benchmarks collect perf counters - this helps validate hypotheses about improvements/regressions (benchmarks: http://github.com/google/benchmark)
- Loading branch information
Showing
5 changed files
with
392 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
# coding=utf-8 | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Analysis for benchmark results.json.""" | ||
|
||
import collections | ||
import math | ||
import statistics | ||
|
||
from typing import Any | ||
from typing import Dict | ||
from typing import Iterable | ||
from typing import List | ||
from typing import Tuple | ||
|
||
# For each benchmark, and for each counter, capture the recorded values. | ||
PerBenchmarkResults = Dict[str, Dict[str, List[float]]] | ||
|
||
# Benchmark data, as captured by the benchmark json output: a dictionary from | ||
# benchmark names to a list of run results. Each run result is a dictionary of | ||
# key-value pairs, e.g. counter name - value. | ||
BenchmarkRunResults = Dict[str, List[Dict[str, Any]]] | ||
|
||
# A comparison per benchmark, per counter, capturing the geomean and the stdev | ||
# of the base and experiment values. | ||
ABComparison = Dict[str, Dict[str, Tuple[float, float, float]]] | ||
|
||
|
||
def _geomean(data: List[float]): | ||
return math.exp(sum([math.log(x) for x in data]) / len(data)) | ||
|
||
|
||
def _stdev(data: List[float]): | ||
assert data | ||
return 0.0 if len(data) == 1 else statistics.stdev(data) | ||
|
||
|
||
class BenchmarkReport: | ||
"""The counter values collected for benchmarks in a benchmark suite.""" | ||
|
||
def __init__(self, suite_name: str, json_data: BenchmarkRunResults, | ||
counter_names: Iterable[str]): | ||
self._suite_name = suite_name | ||
self._load_values(json_data, counter_names) | ||
|
||
def suite_name(self): | ||
return self._suite_name | ||
|
||
def values(self): | ||
return self._values | ||
|
||
def names(self): | ||
return self._names | ||
|
||
def counters(self): | ||
return self._counters | ||
|
||
def raw_measurements(self): | ||
return self._raw_measurements | ||
|
||
def counter_means(self, benchmark: str, counter: str) -> Tuple[float, float]: | ||
if counter not in self.counters(): | ||
raise ValueError('unknown counter') | ||
if benchmark not in self.names(): | ||
raise ValueError('unknown benchmark') | ||
return (_geomean(self._values[benchmark][counter]), | ||
_stdev(self._values[benchmark][counter])) | ||
|
||
def zero_counters(self): | ||
ret = set() | ||
for name in self.names(): | ||
for counter in self.values()[name]: | ||
if 0.0 in self.values()[name][counter]: | ||
ret.add((name, counter)) | ||
return frozenset(ret) | ||
|
||
def large_variation_counters(self, variation: float): | ||
ret = set() | ||
for name in self.names(): | ||
for counter in self.values()[name]: | ||
vals = self.values()[name][counter] | ||
swing = _stdev(vals) / _geomean(vals) | ||
if swing > variation: | ||
ret.add((name, counter, swing)) | ||
return frozenset(ret) | ||
|
||
def _load_values(self, data: BenchmarkRunResults, | ||
names: Iterable[str]) -> PerBenchmarkResults: | ||
"""Organize json values per-benchmark, per counter. | ||
Args: | ||
data: json data | ||
names: perf counter names | ||
Returns: | ||
benchmark data organized per-benchmark, per-counter name. | ||
""" | ||
runs = data['benchmarks'] | ||
self._values = collections.defaultdict( | ||
lambda: collections.defaultdict(list)) | ||
self._raw_measurements = collections.defaultdict( | ||
lambda: collections.defaultdict(list)) | ||
self._counters = set() | ||
self._names = set() | ||
|
||
for r in runs: | ||
benchmark_name = r['name'] | ||
for counter in names: | ||
value = float(r[counter]) | ||
iters = float(r['iterations']) | ||
self._raw_measurements[benchmark_name][counter].append(value * iters) | ||
self._values[benchmark_name][counter].append(value) | ||
self._counters.add(counter) | ||
self._names.add(benchmark_name) | ||
self._counters = frozenset(self._counters) | ||
self._names = frozenset(self._names) | ||
|
||
|
||
class BenchmarkComparison: | ||
"""Analysis of 2 benchmark runs.""" | ||
|
||
def __init__(self, base_report: BenchmarkReport, exp_report: BenchmarkReport): | ||
if base_report.suite_name() != exp_report.suite_name(): | ||
raise ValueError('cannot compare different suites') | ||
if set(base_report.names()) != set(exp_report.names()): | ||
raise ValueError('suite runs have different benchmark names') | ||
if set(base_report.counters()) != set(exp_report.counters()): | ||
raise ValueError( | ||
'counter names are different between base and experiment') | ||
|
||
self._base = base_report | ||
self._exp = exp_report | ||
|
||
def suite_name(self): | ||
return self._base.suite_name() | ||
|
||
def summarize(self) -> ABComparison: | ||
"""Summarize the results from two runs (base/experiment). | ||
Returns: | ||
A per benchmark, per counter summary of the improvement/regression | ||
between the 2 runs, in percents. | ||
""" | ||
base_results = self._base.values() | ||
exp_results = self._exp.values() | ||
|
||
ret = {} | ||
for bname in base_results: | ||
ret[bname] = {} | ||
for counter in base_results[bname]: | ||
base_vals = base_results[bname][counter] | ||
exp_vals = exp_results[bname][counter] | ||
base_geomean = _geomean(base_vals) | ||
exp_geomean = _geomean(exp_vals) | ||
improvement = 1 - exp_geomean / base_geomean | ||
base_stdev = _stdev(base_vals) | ||
exp_stdev = _stdev(exp_vals) | ||
ret[bname][counter] = (improvement, base_stdev / base_geomean, | ||
exp_stdev / exp_geomean) | ||
return ret | ||
|
||
def names(self): | ||
return self._base.names() | ||
|
||
def counters(self): | ||
return self._base.counters() | ||
|
||
def total_improvement(self, counter: str): | ||
assert counter in self.counters() | ||
logsum = 0 | ||
# we look at the geomean of the improvement for each benchmark | ||
for bname in self.names(): | ||
b_geomean, _ = self._base.counter_means(bname, counter) | ||
e_geomean, _ = self._exp.counter_means(bname, counter) | ||
logsum += math.log(e_geomean / b_geomean) | ||
return 1.0 - math.exp(logsum / len(self.names())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# coding=utf-8 | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
r"""Convert benchmark results.json to csv. | ||
To run: | ||
python3 compiler_opt/tools/benchmark_report_counter.py \ | ||
--base=/tmp/base_report.json \ | ||
--exp=/tmp/exp_report.json \ | ||
--counters=INSTRUCTIONS \ | ||
--counters=CYCLES \ | ||
--output=/tmp/summary.csv | ||
optionally, add --suite_name=<name of benchmark>, if batch-processing multiple | ||
benchmarks' reports. | ||
Assuming /tmp/{base|exp}_report.json were produced from benchmark runs, which | ||
were asked to collect the counters named INSTRUCTIONS and CYCLES. | ||
""" | ||
|
||
import csv | ||
import json | ||
|
||
from typing import Sequence | ||
|
||
from absl import app | ||
from absl import flags | ||
|
||
import tensorflow.compat.v2 as tf | ||
|
||
from compiler_opt.tools import benchmark_report | ||
|
||
flags.DEFINE_string('suite_name', 'benchmark_suite', | ||
'The name of the benchmark suite (for reporting).') | ||
flags.DEFINE_string('base', None, | ||
'JSON report produced by the base benchmark run.') | ||
flags.DEFINE_string('exp', None, | ||
'JSON report produced by the experiment benchmark run.') | ||
flags.DEFINE_string('output', 'reports.csv', 'CSV output') | ||
flags.DEFINE_multi_string( | ||
'counters', None, | ||
'Counter names. Should match exactly the names used when running the' | ||
'benchmark.') | ||
|
||
FLAGS = flags.FLAGS | ||
|
||
|
||
def main(argv: Sequence[str]) -> None: | ||
if len(argv) > 1: | ||
raise app.UsageError('Too many command-line arguments.') | ||
with tf.io.gfile.GFile(FLAGS.base, 'r') as b: | ||
with tf.io.gfile.GFile(FLAGS.exp, 'r') as e: | ||
b = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(b), | ||
FLAGS.counters) | ||
e = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(e), | ||
FLAGS.counters) | ||
comparison = benchmark_report.BenchmarkComparison(b, e) | ||
summary = comparison.summarize() | ||
with tf.io.gfile.GFile(FLAGS.output, 'w+') as o: | ||
co = csv.writer(o) | ||
for bm in summary: | ||
for c in summary[bm]: | ||
co.writerow([bm, c] + list(summary[bm][c])) | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# coding=utf-8 | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Tests for compiler_opt.tools.benchmark_report_converter.""" | ||
|
||
from absl.testing import absltest | ||
|
||
from compiler_opt.tools import benchmark_report | ||
|
||
|
||
base_data = { | ||
'benchmarks': [ | ||
{ | ||
'PerfCounter_0': 10, | ||
'PerfCounter_1': 20, | ||
'iterations': 10, | ||
'name': 'BM_A', | ||
}, | ||
{ | ||
'PerfCounter_0': 11, | ||
'PerfCounter_1': 19, | ||
'iterations': 11, | ||
'name': 'BM_A', | ||
}, | ||
{ | ||
'PerfCounter_0': 60, | ||
'PerfCounter_1': 50, | ||
'iterations': 15, | ||
'name': 'BM_B', | ||
}, | ||
] | ||
} | ||
|
||
exp_data = { | ||
'benchmarks': [ | ||
{ | ||
'PerfCounter_0': 9, | ||
'PerfCounter_1': 11, | ||
'iterations': 11, | ||
'name': 'BM_A', | ||
}, | ||
{ | ||
'PerfCounter_0': 8, | ||
'PerfCounter_1': 10, | ||
'iterations': 8, | ||
'name': 'BM_A', | ||
}, | ||
{ | ||
'PerfCounter_0': 62, | ||
'PerfCounter_1': 54, | ||
'iterations': 14, | ||
'name': 'BM_B', | ||
}, | ||
] | ||
} | ||
|
||
|
||
class BenchmarkReportConverterTest(absltest.TestCase): | ||
|
||
def test_loading(self): | ||
report = benchmark_report.BenchmarkReport( | ||
'foo', base_data, ['PerfCounter_0', 'PerfCounter_1']) | ||
self.assertEqual( | ||
report.values(), { | ||
'BM_A': { | ||
'PerfCounter_0': [10, 11], | ||
'PerfCounter_1': [20, 19] | ||
}, | ||
'BM_B': { | ||
'PerfCounter_0': [60], | ||
'PerfCounter_1': [50], | ||
} | ||
}) | ||
self.assertSetEqual(report.names(), set(['BM_A', 'BM_B'])) | ||
self.assertSetEqual(report.counters(), | ||
set(['PerfCounter_0', 'PerfCounter_1'])) | ||
self.assertEqual( | ||
report.counter_means('BM_A', 'PerfCounter_0'), | ||
(10.488088481701517, 0.7071067811865476)) | ||
|
||
def test_summarize_results(self): | ||
b_values = benchmark_report.BenchmarkReport( | ||
'foo', base_data, ['PerfCounter_0', 'PerfCounter_1']) | ||
e_values = benchmark_report.BenchmarkReport( | ||
'foo', exp_data, ['PerfCounter_0', 'PerfCounter_1']) | ||
summary = benchmark_report.BenchmarkComparison(b_values, e_values) | ||
self.assertDictEqual( | ||
summary.summarize(), { | ||
'BM_A': { | ||
'PerfCounter_0': (0.19096016504410973, 0.0674199862463242, | ||
0.08333333333333334), | ||
'PerfCounter_1': | ||
(0.4619724131510293, 0.0362738125055006, 0.0674199862463242) | ||
}, | ||
'BM_B': { | ||
'PerfCounter_0': (-0.03333333333333366, 0.0, 0.0), | ||
'PerfCounter_1': (-0.0800000000000003, 0.0, 0.0) | ||
} | ||
}) | ||
self.assertEqual( | ||
summary.total_improvement('PerfCounter_0'), 0.08566536243319522) | ||
|
||
|
||
if __name__ == '__main__': | ||
absltest.main() |
Oops, something went wrong.