Summary statistics for checkers

It can be used to see the number of reports per checker for 'parse' command. Before storing it is helpful to verify with the '--summary' flag, which checkers are generating too many reports in a large report directory. The result is a table that has a checker name and number of reports columns. It shows the results in descending order. Example command: 'CodeChecker parse reports/ --summary' the result table would be: ---==== Checkers Summary Statistics ====---- ------------------------------------------------------------------------------- Checker name | Number of reports ------------------------------------------------------------------------------- readability-avoid-const-params-in-decls | 266836 modernize-use-trailing-return-type | 255116 readability-magic-numbers | 138216 modernize-avoid-c-arrays | 116741 ------------------------------------------------------------------------------- ----=================----
Ericsson · Sep 22, 2023 · 59b1ddb · 59b1ddb
1 parent 90464ac
commit 59b1ddb
Showing 1 changed file with 104 additions and 37 deletions.
diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py
@@ -15,6 +15,7 @@
 import os
 import sys
 from typing import Dict, Optional, Set
+import subprocess
 
 from codechecker_report_converter.util import dump_json_output
 from codechecker_report_converter.report import report_file, \
@@ -34,6 +35,7 @@
 from codechecker_common.skiplist_handler import SkipListHandler, \
     SkipListHandlers
 from codechecker_common.util import load_json
+from codechecker_report_converter import twodim
 
 
 LOG = logger.get_logger('system')
@@ -231,6 +233,18 @@ def add_arguments_to_parser(parser):
                             "/a/x.cpp and /a/y.cpp then \"/a/*.cpp\" "
                             "selects both.")
 
+    parser.add_argument('--summary',
+                        dest="summary",
+                        required=False,
+                        default=argparse.SUPPRESS,
+                        action='store_true',
+                        help="Statistics for checkers. "
+                             "It can be used to see "
+                             "the number of findings per checker. "
+                             "It is helpful to verify "
+                             "which checkers are generating too many reports "
+                             "in a large report directory.")
+
     logger.add_verbose_arguments(parser)
     parser.set_defaults(
         func=main, func_process_config_file=cmd_config.process_config_file)
@@ -383,6 +397,7 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
     processed_path_hashes = set()
     processed_file_paths = set()
     print_steps = 'print_steps' in args
+    checker_stats = {}
 
     html_builder: Optional[report_to_html.HtmlBuilder] = None
     if export == 'html':
@@ -393,55 +408,107 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
     for dir_path, file_paths in report_file.analyzer_result_files(args.input):
         metadata = get_metadata(dir_path)
         for file_path in file_paths:
-            reports = report_file.get_reports(
-                file_path, context.checker_labels, file_cache)
-
-            reports = reports_helper.skip(
-                reports, processed_path_hashes, skip_handlers, suppr_handler,
-                src_comment_status_filter)
-
-            statistics.num_of_analyzer_result_files += 1
-            for report in reports:
-                if report.changed_files:
-                    changed_files.update(report.changed_files)
-
-                statistics.add_report(report)
-
-                if trim_path_prefixes:
-                    report.trim_path_prefixes(trim_path_prefixes)
-
-            all_reports.extend(reports)
-
-            # Print reports continously.
-            if not export:
-                file_report_map = plaintext.get_file_report_map(
-                    reports, file_path, metadata)
-                plaintext.convert(
-                    file_report_map, processed_file_paths, print_steps)
-            elif export == 'html':
-                print(f"Parsing input file '{file_path}'.")
-                report_to_html.convert(
-                    file_path, reports, output_dir_path,
-                    html_builder)
-
-    if export is None:  # Plain text output
+            if 'summary' in args:
+                command = f"grep -A1 '<key>check_name</key>' {file_path} | " \
+                           "grep '<string>' " \
+                           r"| sed -E 's/<string>([^<]+)<\/string>/\1/'" \
+                          if os.path.splitext(
+                            os.path.basename(file_path))[1] == ".plist" \
+                          else None
+
+                if not command:
+                    LOG.warning(f"{file_path} file \
+                                has an unsupported file type")
+                    continue
+
+                try:
+                    subp_out = subprocess.check_output(
+                        command,
+                        stderr=subprocess.STDOUT,
+                        shell=True,
+                        universal_newlines=True
+                    )
+                    if subp_out:
+                        checker_names = [line.strip()
+                                         for line
+                                         in subp_out.strip().split("\n")]
+                        for checker_name in checker_names:
+                            checker_stats[checker_name] = checker_stats[
+                                checker_name] + 1 \
+                                if checker_name in checker_stats else 1
+                except subprocess.CalledProcessError as e:
+                    # If the return code of the grep subprocess is one,
+                    # the report file does not match the regular expression
+                    if e.returncode == 1:
+                        continue
+                    else:
+                        LOG.error(f"Subprocess problem occurred in \
+                                  {file_path} file - {e}")
+                        sys.exit(1)
+            else:
+                reports = report_file.get_reports(
+                    file_path, context.checker_labels, file_cache)
+
+                reports = reports_helper.skip(
+                    reports, processed_path_hashes, skip_handlers,
+                    suppr_handler, src_comment_status_filter)
+
+                statistics.num_of_analyzer_result_files += 1
+                for report in reports:
+                    if report.changed_files:
+                        changed_files.update(report.changed_files)
+
+                    statistics.add_report(report)
+
+                    if trim_path_prefixes:
+                        report.trim_path_prefixes(trim_path_prefixes)
+
+                all_reports.extend(reports)
+
+                # Print reports continously.
+                if not export:
+                    file_report_map = plaintext.get_file_report_map(
+                        reports, file_path, metadata)
+                    plaintext.convert(
+                        file_report_map, processed_file_paths, print_steps)
+                elif export == 'html':
+                    print(f"Parsing input file '{file_path}'.")
+                    report_to_html.convert(
+                        file_path, reports, output_dir_path,
+                        html_builder)
+
+    if 'summary' in args and export is None:
+        sys.stdout.write("\n----==== Checkers Summary Statistics ====----\n")
+        header = ["Checker name", "Number of reports"]
+        sys.stdout.write(twodim.to_table([header] + [
+            (checker_naem, number_of_report)
+            for (checker_naem, number_of_report)
+            in sorted(
+                checker_stats.items(),
+                key=lambda item:item[1],
+                reverse=True
+            )]))
+        sys.stdout.write("\n----=================----\n")
+    elif export is None and 'summary' not in args:  # Plain text output
         statistics.write()
-    elif export == 'html':
+    elif export == 'html' and 'summary' not in args:
         html_builder.finish(output_dir_path, statistics)
-    elif export == 'json':
+    elif export == 'json' and 'summary' not in args:
         data = report_to_json.convert(all_reports)
         dump_json_output(data, get_output_file_path("reports.json"))
-    elif export == 'codeclimate':
+    elif export == 'codeclimate' and 'summary' not in args:
         data = codeclimate.convert(all_reports)
         dump_json_output(data, get_output_file_path("reports.json"))
-    elif export == 'gerrit':
+    elif export == 'gerrit' and 'summary' not in args:
         data = gerrit.convert(all_reports)
         dump_json_output(data, get_output_file_path("reports.json"))
-    elif export == 'baseline':
+    elif export == 'baseline' and 'summary' not in args:
         data = baseline.convert(all_reports)
         output_path = get_output_file_path("reports.baseline")
         if output_path:
             baseline.write(output_path, data)
+    else:
+        LOG.error("Summary and export flags cannot be given at the same time.")
 
     reports_helper.dump_changed_files(changed_files)