diff --git a/compiler_opt/tools/benchmark_report.py b/compiler_opt/tools/benchmark_report.py new file mode 100644 index 00000000..ef04da53 --- /dev/null +++ b/compiler_opt/tools/benchmark_report.py @@ -0,0 +1,187 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Analysis for benchmark results.json.""" + +import collections +import math +import statistics + +from typing import Any +from typing import Dict +from typing import Iterable +from typing import List +from typing import Tuple + +# For each benchmark, and for each counter, capture the recorded values. +PerBenchmarkResults = Dict[str, Dict[str, List[float]]] + +# Benchmark data, as captured by the benchmark json output: a dictionary from +# benchmark names to a list of run results. Each run result is a dictionary of +# key-value pairs, e.g. counter name - value. +BenchmarkRunResults = Dict[str, List[Dict[str, Any]]] + +# A comparison per benchmark, per counter, capturing the geomean and the stdev +# of the base and experiment values. +ABComparison = Dict[str, Dict[str, Tuple[float, float, float]]] + + +def _geomean(data: List[float]): + return math.exp(sum([math.log(x) for x in data]) / len(data)) + + +def _stdev(data: List[float]): + assert data + return 0.0 if len(data) == 1 else statistics.stdev(data) + + +class BenchmarkReport: + """The counter values collected for benchmarks in a benchmark suite.""" + + def __init__(self, suite_name: str, json_data: BenchmarkRunResults, + counter_names: Iterable[str]): + self._suite_name = suite_name + self._load_values(json_data, counter_names) + + def suite_name(self): + return self._suite_name + + def values(self): + return self._values + + def names(self): + return self._names + + def counters(self): + return self._counters + + def raw_measurements(self): + return self._raw_measurements + + def counter_means(self, benchmark: str, counter: str) -> Tuple[float, float]: + if counter not in self.counters(): + raise ValueError('unknown counter') + if benchmark not in self.names(): + raise ValueError('unknown benchmark') + return (_geomean(self._values[benchmark][counter]), + _stdev(self._values[benchmark][counter])) + + def zero_counters(self): + ret = set() + for name in self.names(): + for counter in self.values()[name]: + if 0.0 in self.values()[name][counter]: + ret.add((name, counter)) + return frozenset(ret) + + def large_variation_counters(self, variation: float): + ret = set() + for name in self.names(): + for counter in self.values()[name]: + vals = self.values()[name][counter] + swing = _stdev(vals) / _geomean(vals) + if swing > variation: + ret.add((name, counter, swing)) + return frozenset(ret) + + def _load_values(self, data: BenchmarkRunResults, + names: Iterable[str]) -> PerBenchmarkResults: + """Organize json values per-benchmark, per counter. + + Args: + data: json data + names: perf counter names + Returns: + benchmark data organized per-benchmark, per-counter name. + """ + runs = data['benchmarks'] + self._values = collections.defaultdict( + lambda: collections.defaultdict(list)) + self._raw_measurements = collections.defaultdict( + lambda: collections.defaultdict(list)) + self._counters = set() + self._names = set() + + for r in runs: + benchmark_name = r['name'] + for counter in names: + value = float(r[counter]) + iters = float(r['iterations']) + self._raw_measurements[benchmark_name][counter].append(value * iters) + self._values[benchmark_name][counter].append(value) + self._counters.add(counter) + self._names.add(benchmark_name) + self._counters = frozenset(self._counters) + self._names = frozenset(self._names) + + +class BenchmarkComparison: + """Analysis of 2 benchmark runs.""" + + def __init__(self, base_report: BenchmarkReport, exp_report: BenchmarkReport): + if base_report.suite_name() != exp_report.suite_name(): + raise ValueError('cannot compare different suites') + if set(base_report.names()) != set(exp_report.names()): + raise ValueError('suite runs have different benchmark names') + if set(base_report.counters()) != set(exp_report.counters()): + raise ValueError( + 'counter names are different between base and experiment') + + self._base = base_report + self._exp = exp_report + + def suite_name(self): + return self._base.suite_name() + + def summarize(self) -> ABComparison: + """Summarize the results from two runs (base/experiment). + + Returns: + A per benchmark, per counter summary of the improvement/regression + between the 2 runs, in percents. + """ + base_results = self._base.values() + exp_results = self._exp.values() + + ret = {} + for bname in base_results: + ret[bname] = {} + for counter in base_results[bname]: + base_vals = base_results[bname][counter] + exp_vals = exp_results[bname][counter] + base_geomean = _geomean(base_vals) + exp_geomean = _geomean(exp_vals) + improvement = 1 - exp_geomean / base_geomean + base_stdev = _stdev(base_vals) + exp_stdev = _stdev(exp_vals) + ret[bname][counter] = (improvement, base_stdev / base_geomean, + exp_stdev / exp_geomean) + return ret + + def names(self): + return self._base.names() + + def counters(self): + return self._base.counters() + + def total_improvement(self, counter: str): + assert counter in self.counters() + logsum = 0 + # we look at the geomean of the improvement for each benchmark + for bname in self.names(): + b_geomean, _ = self._base.counter_means(bname, counter) + e_geomean, _ = self._exp.counter_means(bname, counter) + logsum += math.log(e_geomean / b_geomean) + return 1.0 - math.exp(logsum / len(self.names())) diff --git a/compiler_opt/tools/benchmark_report_converter.py b/compiler_opt/tools/benchmark_report_converter.py new file mode 100644 index 00000000..214419a9 --- /dev/null +++ b/compiler_opt/tools/benchmark_report_converter.py @@ -0,0 +1,79 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Convert benchmark results.json to csv. + +To run: +python3 compiler_opt/tools/benchmark_report_counter.py \ + --base=/tmp/base_report.json \ + --exp=/tmp/exp_report.json \ + --counters=INSTRUCTIONS \ + --counters=CYCLES \ + --output=/tmp/summary.csv + +optionally, add --suite_name=, if batch-processing multiple +benchmarks' reports. + +Assuming /tmp/{base|exp}_report.json were produced from benchmark runs, which +were asked to collect the counters named INSTRUCTIONS and CYCLES. +""" + +import csv +import json + +from typing import Sequence + +from absl import app +from absl import flags + +import tensorflow.compat.v2 as tf + +from compiler_opt.tools import benchmark_report + +flags.DEFINE_string('suite_name', 'benchmark_suite', + 'The name of the benchmark suite (for reporting).') +flags.DEFINE_string('base', None, + 'JSON report produced by the base benchmark run.') +flags.DEFINE_string('exp', None, + 'JSON report produced by the experiment benchmark run.') +flags.DEFINE_string('output', 'reports.csv', 'CSV output') +flags.DEFINE_multi_string( + 'counters', None, + 'Counter names. Should match exactly the names used when running the' + 'benchmark.') + +FLAGS = flags.FLAGS + + +def main(argv: Sequence[str]) -> None: + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + with tf.io.gfile.GFile(FLAGS.base, 'r') as b: + with tf.io.gfile.GFile(FLAGS.exp, 'r') as e: + b = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(b), + FLAGS.counters) + e = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(e), + FLAGS.counters) + comparison = benchmark_report.BenchmarkComparison(b, e) + summary = comparison.summarize() + with tf.io.gfile.GFile(FLAGS.output, 'w+') as o: + co = csv.writer(o) + for bm in summary: + for c in summary[bm]: + co.writerow([bm, c] + list(summary[bm][c])) + + +if __name__ == '__main__': + app.run(main) diff --git a/compiler_opt/tools/benchmark_report_test.py b/compiler_opt/tools/benchmark_report_test.py new file mode 100644 index 00000000..b4a1ac91 --- /dev/null +++ b/compiler_opt/tools/benchmark_report_test.py @@ -0,0 +1,117 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for compiler_opt.tools.benchmark_report_converter.""" + +from absl.testing import absltest + +from compiler_opt.tools import benchmark_report + + +base_data = { + 'benchmarks': [ + { + 'PerfCounter_0': 10, + 'PerfCounter_1': 20, + 'iterations': 10, + 'name': 'BM_A', + }, + { + 'PerfCounter_0': 11, + 'PerfCounter_1': 19, + 'iterations': 11, + 'name': 'BM_A', + }, + { + 'PerfCounter_0': 60, + 'PerfCounter_1': 50, + 'iterations': 15, + 'name': 'BM_B', + }, + ] +} + +exp_data = { + 'benchmarks': [ + { + 'PerfCounter_0': 9, + 'PerfCounter_1': 11, + 'iterations': 11, + 'name': 'BM_A', + }, + { + 'PerfCounter_0': 8, + 'PerfCounter_1': 10, + 'iterations': 8, + 'name': 'BM_A', + }, + { + 'PerfCounter_0': 62, + 'PerfCounter_1': 54, + 'iterations': 14, + 'name': 'BM_B', + }, + ] +} + + +class BenchmarkReportConverterTest(absltest.TestCase): + + def test_loading(self): + report = benchmark_report.BenchmarkReport( + 'foo', base_data, ['PerfCounter_0', 'PerfCounter_1']) + self.assertEqual( + report.values(), { + 'BM_A': { + 'PerfCounter_0': [10, 11], + 'PerfCounter_1': [20, 19] + }, + 'BM_B': { + 'PerfCounter_0': [60], + 'PerfCounter_1': [50], + } + }) + self.assertSetEqual(report.names(), set(['BM_A', 'BM_B'])) + self.assertSetEqual(report.counters(), + set(['PerfCounter_0', 'PerfCounter_1'])) + self.assertEqual( + report.counter_means('BM_A', 'PerfCounter_0'), + (10.488088481701517, 0.7071067811865476)) + + def test_summarize_results(self): + b_values = benchmark_report.BenchmarkReport( + 'foo', base_data, ['PerfCounter_0', 'PerfCounter_1']) + e_values = benchmark_report.BenchmarkReport( + 'foo', exp_data, ['PerfCounter_0', 'PerfCounter_1']) + summary = benchmark_report.BenchmarkComparison(b_values, e_values) + self.assertDictEqual( + summary.summarize(), { + 'BM_A': { + 'PerfCounter_0': (0.19096016504410973, 0.0674199862463242, + 0.08333333333333334), + 'PerfCounter_1': + (0.4619724131510293, 0.0362738125055006, 0.0674199862463242) + }, + 'BM_B': { + 'PerfCounter_0': (-0.03333333333333366, 0.0, 0.0), + 'PerfCounter_1': (-0.0800000000000003, 0.0, 0.0) + } + }) + self.assertEqual( + summary.total_improvement('PerfCounter_0'), 0.08566536243319522) + + +if __name__ == '__main__': + absltest.main() diff --git a/compiler_opt/tools/extract_ir.py b/compiler_opt/tools/extract_ir.py index 92de4d2d..0b924175 100644 --- a/compiler_opt/tools/extract_ir.py +++ b/compiler_opt/tools/extract_ir.py @@ -41,7 +41,6 @@ from absl import flags from absl import logging - flags.DEFINE_string( 'input', None, 'Input file - either compile_commands.json or a linker parameter list') @@ -98,9 +97,9 @@ def __init__(self, obj_relative_path, output_base_dir, obj_base_dir=None): """Set up a TrainingIRExtractor. Args: - obj_relative_path: relative path to the input object file. It will be - also used to construct the absolute path of the output IR and cmd - files, by appending it to output_base_dir. + obj_relative_path: relative path to the input object file. It will be also + used to construct the absolute path of the output IR and cmd files, by + appending it to output_base_dir. output_base_dir: the directory under which the output will be produced. obj_base_dir: the base directory for all the input object files. """ @@ -262,9 +261,7 @@ def main(argv): logging.error('Unknown input type: %s', FLAGS.input_type) pool = multiprocessing.Pool(FLAGS.num_workers) - relative_output_paths = [ - pool.apply(extract_artifacts, (obj,)) for obj in objs - ] + relative_output_paths = pool.map(extract_artifacts, objs) # Write all Non-None relative paths to FLAGS.output_dir/module_paths. with open(os.path.join(FLAGS.output_dir, 'module_paths'), 'w') as f: diff --git a/requirements.txt b/requirements.txt index 740c8af5..15cdcf58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -tensorflow>=2.4.1 -tf-agents>=0.7.1 -gym>=0.18.0 -gin>=0.1.6 -absl-py +tensorflow==2.4.1 +tf-agents==0.7.1 +gym==0.18.0 +gin==0.1.6 +absl-py==0.11.0