Merge pull request #4152 from Szelethus/store_primitive_speedup

bruntib · web-flow · commit 3115d952128e · 2024-04-02T13:42:56.000+02:00
[store] Unique reports _before_ storing
diff --git a/tools/report-converter/codechecker_report_converter/report/hash.py b/tools/report-converter/codechecker_report_converter/report/hash.py
@@ -205,5 +205,4 @@ def get_report_path_hash(report: Report) -> str:
     if not report_path_hash:
         LOG.error('Failed to generate report path hash: %s', report)
 
-    LOG.debug(report_path_hash)
     return __str_to_hash(report_path_hash)
diff --git a/tools/report-converter/codechecker_report_converter/report/parser/plist.py b/tools/report-converter/codechecker_report_converter/report/parser/plist.py
@@ -519,6 +519,11 @@ def convert(
                         self._create_macro_expansion(
                             macro_expansion, file_index_map))
 
+            if report.annotations:
+                diagnostic["report-annotation"] = dict()
+                for key, value in report.annotations.items():
+                    diagnostic["report-annotation"][key] = value
+
             data['diagnostics'].append(diagnostic)
 
         return data
diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py
@@ -37,6 +37,7 @@
     reports as reports_helper, statistics as report_statistics
 from codechecker_report_converter.report.hash import HashType, \
     get_report_path_hash
+from codechecker_report_converter.report.parser.base import AnalyzerInfo
 
 from codechecker_client import client as libclient
 from codechecker_client import product
@@ -404,10 +405,12 @@ def parse_analyzer_result_files(
     """ Get reports from the given analyzer result files. """
     analyzer_result_file_reports: AnalyzerResultFileReports = defaultdict(list)
 
-    for file_path, reports in zip(
+    for idx, (file_path, reports) in enumerate(zip(
             analyzer_result_files, zip_iter(
                 functools.partial(get_reports, checker_labels=checker_labels),
-                analyzer_result_files)):
+                analyzer_result_files))):
+        LOG.debug(f"[{idx}/{len(analyzer_result_files)}] "
+                  f"Parsed '{file_path}' ...")
         analyzer_result_file_reports[file_path] = reports
 
     return analyzer_result_file_reports
@@ -421,8 +424,12 @@ def assemble_zip(inputs,
     """Collect and compress report and source files, together with files
     contanining analysis related information into a zip file which
     will be sent to the server.
+
+    For each report directory, we create a uniqued zipped directory. Each
+    report directory to store could have been made with different
+    configurations, so we can't merge them all into a single zip.
     """
-    files_to_compress = set()
+    files_to_compress: Dict[str, set] = defaultdict(set)
     analyzer_result_file_paths = []
     stats = StorageZipStatistics()
 
@@ -431,21 +438,24 @@ def assemble_zip(inputs,
 
         metadata_file_path = os.path.join(dir_path, 'metadata.json')
         if os.path.exists(metadata_file_path):
-            files_to_compress.add(metadata_file_path)
+            files_to_compress[os.path.dirname(metadata_file_path)] \
+                .add(metadata_file_path)
 
         skip_file_path = os.path.join(dir_path, 'skip_file')
         if os.path.exists(skip_file_path):
             with open(skip_file_path, 'r') as f:
                 LOG.info("Found skip file %s with the following content:\n%s",
                          skip_file_path, f.read())
 
-            files_to_compress.add(skip_file_path)
+            files_to_compress[os.path.dirname(skip_file_path)] \
+                .add(skip_file_path)
 
         review_status_file_path = os.path.join(dir_path, 'review_status.yaml')
         if os.path.exists(review_status_file_path):
-            files_to_compress.add(review_status_file_path)
+            files_to_compress[os.path.dirname(review_status_file_path)]\
+                .add(review_status_file_path)
 
-    LOG.debug("Processing report files ...")
+    LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...")
 
     with MultiProcessPool() as executor:
         analyzer_result_file_reports = parse_analyzer_result_files(
@@ -456,24 +466,43 @@ def assemble_zip(inputs,
     changed_files = set()
     file_paths = set()
     file_report_positions: FileReportPositions = defaultdict(set)
-    unique_reports = set()
+    unique_reports: Dict[str, Dict[str, List[Report]]] = defaultdict(dict)
+
+    unique_report_hashes = set()
     for file_path, reports in analyzer_result_file_reports.items():
-        files_to_compress.add(file_path)
         stats.num_of_analyzer_result_files += 1
 
         for report in reports:
             if report.changed_files:
                 changed_files.update(report.changed_files)
                 continue
-            # Need to calculate unique reoirt count to determine report limit
+            # Unique all bug reports per report directory; also, count how many
+            # reports we want to store at once to check for the report store
+            # limit.
             report_path_hash = get_report_path_hash(report)
-            if report_path_hash not in unique_reports:
-                unique_reports.add(report_path_hash)
+            if report_path_hash not in unique_report_hashes:
+                unique_report_hashes.add(report_path_hash)
+                unique_reports[os.path.dirname(file_path)]\
+                    .setdefault(report.analyzer_name, []) \
+                    .append(report)
                 stats.add_report(report)
 
             file_paths.update(report.original_files)
             file_report_positions[report.file.original_path].add(report.line)
 
+    files_to_delete = []
+    for dirname, analyzer_reports in unique_reports.items():
+        for analyzer_name, reports in analyzer_reports.items():
+            if not analyzer_name:
+                analyzer_name = 'unknown'
+            _, tmpfile = tempfile.mkstemp(f'-{analyzer_name}.plist')
+
+            report_file.create(tmpfile, reports, checker_labels,
+                               AnalyzerInfo(analyzer_name))
+            LOG.debug(f"Stored '{analyzer_name}' unique reports in {tmpfile}.")
+            files_to_compress[dirname].add(tmpfile)
+            files_to_delete.append(tmpfile)
+
     if changed_files:
         reports_helper.dump_changed_files(changed_files)
         sys.exit(1)
@@ -529,15 +558,17 @@ def assemble_zip(inputs,
     LOG.info("Building report zip file...")
     with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf:
         # Add the files to the zip which will be sent to the server.
-        for file_path in files_to_compress:
-            _, file_name = os.path.split(file_path)
 
-            # Create a unique report directory name.
-            report_dir_name = hashlib.md5(os.path.dirname(
-                file_path).encode('utf-8')).hexdigest()
+        for dirname, files in files_to_compress.items():
+            for file_path in files:
+                _, file_name = os.path.split(file_path)
 
-            zip_target = os.path.join('reports', report_dir_name, file_name)
-            zipf.write(file_path, zip_target)
+                # Create a unique report directory name.
+                report_dir_name = \
+                    hashlib.md5(dirname.encode('utf-8')).hexdigest()
+                zip_target = \
+                    os.path.join('reports', report_dir_name, file_name)
+                zipf.write(file_path, zip_target)
 
         collected_file_paths = set()
         for f, h in file_to_hash.items():
@@ -611,6 +642,10 @@ def assemble_zip(inputs,
     LOG.info("Compressing report zip file done (%s / %s).",
              sizeof_fmt(zip_size), sizeof_fmt(compressed_zip_size))
 
+    # We are responsible for deleting these.
+    for file in files_to_delete:
+        os.remove(file)
+
 
 def should_be_zipped(input_file: str, input_files: Iterable[str]) -> bool:
     """
diff --git a/web/tests/functional/component/test_component.py b/web/tests/functional/component/test_component.py
@@ -199,6 +199,9 @@ def setup_method(self, method):
             }
         ]
 
+    def teardown_method(self, method):
+        self.__remove_all_source_componens()
+
     def __add_new_component(self, component):
         """
         Creates a new source component.
@@ -226,6 +229,11 @@ def __get_user_defined_source_components(self):
         return [c for c in components
                 if GEN_OTHER_COMPONENT_NAME not in c.name]
 
+    def __remove_all_source_componens(self):
+        print(self.__get_user_defined_source_components())
+        for component in self.__get_user_defined_source_components():
+            self.__remove_source_component(component.name)
+
     def __test_other_component(self, components, excluded_from_other,
                                included_in_other=None):
         """
@@ -325,8 +333,6 @@ def test_filter_report_by_component(self):
                                r.checkedFile.endswith('null_dereference.cpp')]
         self.assertEqual(len(divide_zero_reports), 0)
 
-        self.__remove_source_component(test_component['name'])
-
     def test_filter_report_by_complex_component(self):
         """
         Test report filter by complex component which includes and excludes
@@ -375,8 +381,6 @@ def test_filter_report_by_complex_component(self):
                                r.checkedFile.endswith('path_begin.cpp')]
         self.assertEqual(len(divide_zero_reports), 0)
 
-        self.__remove_source_component(test_component['name'])
-
     def test_filter_report_by_multiple_components(self):
         """
         Test report filter by multiple components.
@@ -425,9 +429,6 @@ def test_filter_report_by_multiple_components(self):
                                 r.checkedFile.endswith('call_and_message.cpp')]
         self.assertEqual(len(call_and_msg_reports), 0)
 
-        self.__remove_source_component(test_component1['name'])
-        self.__remove_source_component(test_component2['name'])
-
     def test_filter_report_by_excluding_all_results_component(self):
         """
         Test report filter by component which excludes all reports.
@@ -452,8 +453,6 @@ def test_filter_report_by_excluding_all_results_component(self):
         # No reports for this component.
         self.assertEqual(len(run_results), 0)
 
-        self.__remove_source_component(test_component['name'])
-
     def test_component_name_with_whitespaces(self):
         """
         Creates a new component which contains white spaces and removes it at
@@ -462,7 +461,6 @@ def test_component_name_with_whitespaces(self):
         test_component = self.components[1]
 
         self.__add_new_component(test_component)
-        self.__remove_source_component(test_component['name'])
 
     def test_no_user_defined_component(self):
         """
@@ -496,7 +494,6 @@ def test_other_with_single_user_defined_component(self):
 
         excluded_from_other = ['divide_zero.cpp', 'new_delete.cpp']
         self.__test_other_component([component], excluded_from_other)
-        self.__remove_source_component(component['name'])
 
     def test_other_with_multiple_user_defined_component(self):
         """
@@ -540,9 +537,6 @@ def test_other_with_multiple_user_defined_component(self):
         self.__test_other_component(components, excluded_from_other,
                                     included_in_other)
 
-        for c in components:
-            self.__remove_source_component(c['name'])
-
     def test_component_anywhere_on_path(self):
         """
         Check "anywhere on report path" feature. With this flag one can query
@@ -589,6 +583,3 @@ def test_component_anywhere_on_path(self):
         self.assertEqual(len(component_results), 1)
         self.assertTrue(
             component_results[0].checkedFile.endswith('path_end.h'))
-
-        for c in components:
-            self.__remove_source_component(c['name'])
diff --git a/web/tests/functional/dynamic_results/test_dynamic_results.py b/web/tests/functional/dynamic_results/test_dynamic_results.py
@@ -27,7 +27,7 @@
 from libtest import env
 
 
-class DiffRemote(unittest.TestCase):
+class DynamicResults(unittest.TestCase):
 
     def setup_class(self):
         """Setup the environment for testing dynamic_results."""