Skip to content

Commit 3115d95

Browse files
authored
Merge pull request #4152 from Szelethus/store_primitive_speedup
[store] Unique reports _before_ storing
2 parents e2d7df4 + 5328ded commit 3115d95

File tree

5 files changed

+68
-38
lines changed

5 files changed

+68
-38
lines changed

tools/report-converter/codechecker_report_converter/report/hash.py

-1
Original file line numberDiff line numberDiff line change
@@ -205,5 +205,4 @@ def get_report_path_hash(report: Report) -> str:
205205
if not report_path_hash:
206206
LOG.error('Failed to generate report path hash: %s', report)
207207

208-
LOG.debug(report_path_hash)
209208
return __str_to_hash(report_path_hash)

tools/report-converter/codechecker_report_converter/report/parser/plist.py

+5
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,11 @@ def convert(
519519
self._create_macro_expansion(
520520
macro_expansion, file_index_map))
521521

522+
if report.annotations:
523+
diagnostic["report-annotation"] = dict()
524+
for key, value in report.annotations.items():
525+
diagnostic["report-annotation"][key] = value
526+
522527
data['diagnostics'].append(diagnostic)
523528

524529
return data

web/client/codechecker_client/cmd/store.py

+54-19
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
reports as reports_helper, statistics as report_statistics
3838
from codechecker_report_converter.report.hash import HashType, \
3939
get_report_path_hash
40+
from codechecker_report_converter.report.parser.base import AnalyzerInfo
4041

4142
from codechecker_client import client as libclient
4243
from codechecker_client import product
@@ -404,10 +405,12 @@ def parse_analyzer_result_files(
404405
""" Get reports from the given analyzer result files. """
405406
analyzer_result_file_reports: AnalyzerResultFileReports = defaultdict(list)
406407

407-
for file_path, reports in zip(
408+
for idx, (file_path, reports) in enumerate(zip(
408409
analyzer_result_files, zip_iter(
409410
functools.partial(get_reports, checker_labels=checker_labels),
410-
analyzer_result_files)):
411+
analyzer_result_files))):
412+
LOG.debug(f"[{idx}/{len(analyzer_result_files)}] "
413+
f"Parsed '{file_path}' ...")
411414
analyzer_result_file_reports[file_path] = reports
412415

413416
return analyzer_result_file_reports
@@ -421,8 +424,12 @@ def assemble_zip(inputs,
421424
"""Collect and compress report and source files, together with files
422425
contanining analysis related information into a zip file which
423426
will be sent to the server.
427+
428+
For each report directory, we create a uniqued zipped directory. Each
429+
report directory to store could have been made with different
430+
configurations, so we can't merge them all into a single zip.
424431
"""
425-
files_to_compress = set()
432+
files_to_compress: Dict[str, set] = defaultdict(set)
426433
analyzer_result_file_paths = []
427434
stats = StorageZipStatistics()
428435

@@ -431,21 +438,24 @@ def assemble_zip(inputs,
431438

432439
metadata_file_path = os.path.join(dir_path, 'metadata.json')
433440
if os.path.exists(metadata_file_path):
434-
files_to_compress.add(metadata_file_path)
441+
files_to_compress[os.path.dirname(metadata_file_path)] \
442+
.add(metadata_file_path)
435443

436444
skip_file_path = os.path.join(dir_path, 'skip_file')
437445
if os.path.exists(skip_file_path):
438446
with open(skip_file_path, 'r') as f:
439447
LOG.info("Found skip file %s with the following content:\n%s",
440448
skip_file_path, f.read())
441449

442-
files_to_compress.add(skip_file_path)
450+
files_to_compress[os.path.dirname(skip_file_path)] \
451+
.add(skip_file_path)
443452

444453
review_status_file_path = os.path.join(dir_path, 'review_status.yaml')
445454
if os.path.exists(review_status_file_path):
446-
files_to_compress.add(review_status_file_path)
455+
files_to_compress[os.path.dirname(review_status_file_path)]\
456+
.add(review_status_file_path)
447457

448-
LOG.debug("Processing report files ...")
458+
LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...")
449459

450460
with MultiProcessPool() as executor:
451461
analyzer_result_file_reports = parse_analyzer_result_files(
@@ -456,24 +466,43 @@ def assemble_zip(inputs,
456466
changed_files = set()
457467
file_paths = set()
458468
file_report_positions: FileReportPositions = defaultdict(set)
459-
unique_reports = set()
469+
unique_reports: Dict[str, Dict[str, List[Report]]] = defaultdict(dict)
470+
471+
unique_report_hashes = set()
460472
for file_path, reports in analyzer_result_file_reports.items():
461-
files_to_compress.add(file_path)
462473
stats.num_of_analyzer_result_files += 1
463474

464475
for report in reports:
465476
if report.changed_files:
466477
changed_files.update(report.changed_files)
467478
continue
468-
# Need to calculate unique reoirt count to determine report limit
479+
# Unique all bug reports per report directory; also, count how many
480+
# reports we want to store at once to check for the report store
481+
# limit.
469482
report_path_hash = get_report_path_hash(report)
470-
if report_path_hash not in unique_reports:
471-
unique_reports.add(report_path_hash)
483+
if report_path_hash not in unique_report_hashes:
484+
unique_report_hashes.add(report_path_hash)
485+
unique_reports[os.path.dirname(file_path)]\
486+
.setdefault(report.analyzer_name, []) \
487+
.append(report)
472488
stats.add_report(report)
473489

474490
file_paths.update(report.original_files)
475491
file_report_positions[report.file.original_path].add(report.line)
476492

493+
files_to_delete = []
494+
for dirname, analyzer_reports in unique_reports.items():
495+
for analyzer_name, reports in analyzer_reports.items():
496+
if not analyzer_name:
497+
analyzer_name = 'unknown'
498+
_, tmpfile = tempfile.mkstemp(f'-{analyzer_name}.plist')
499+
500+
report_file.create(tmpfile, reports, checker_labels,
501+
AnalyzerInfo(analyzer_name))
502+
LOG.debug(f"Stored '{analyzer_name}' unique reports in {tmpfile}.")
503+
files_to_compress[dirname].add(tmpfile)
504+
files_to_delete.append(tmpfile)
505+
477506
if changed_files:
478507
reports_helper.dump_changed_files(changed_files)
479508
sys.exit(1)
@@ -529,15 +558,17 @@ def assemble_zip(inputs,
529558
LOG.info("Building report zip file...")
530559
with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf:
531560
# Add the files to the zip which will be sent to the server.
532-
for file_path in files_to_compress:
533-
_, file_name = os.path.split(file_path)
534561

535-
# Create a unique report directory name.
536-
report_dir_name = hashlib.md5(os.path.dirname(
537-
file_path).encode('utf-8')).hexdigest()
562+
for dirname, files in files_to_compress.items():
563+
for file_path in files:
564+
_, file_name = os.path.split(file_path)
538565

539-
zip_target = os.path.join('reports', report_dir_name, file_name)
540-
zipf.write(file_path, zip_target)
566+
# Create a unique report directory name.
567+
report_dir_name = \
568+
hashlib.md5(dirname.encode('utf-8')).hexdigest()
569+
zip_target = \
570+
os.path.join('reports', report_dir_name, file_name)
571+
zipf.write(file_path, zip_target)
541572

542573
collected_file_paths = set()
543574
for f, h in file_to_hash.items():
@@ -611,6 +642,10 @@ def assemble_zip(inputs,
611642
LOG.info("Compressing report zip file done (%s / %s).",
612643
sizeof_fmt(zip_size), sizeof_fmt(compressed_zip_size))
613644

645+
# We are responsible for deleting these.
646+
for file in files_to_delete:
647+
os.remove(file)
648+
614649

615650
def should_be_zipped(input_file: str, input_files: Iterable[str]) -> bool:
616651
"""

web/tests/functional/component/test_component.py

+8-17
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ def setup_method(self, method):
199199
}
200200
]
201201

202+
def teardown_method(self, method):
203+
self.__remove_all_source_componens()
204+
202205
def __add_new_component(self, component):
203206
"""
204207
Creates a new source component.
@@ -226,6 +229,11 @@ def __get_user_defined_source_components(self):
226229
return [c for c in components
227230
if GEN_OTHER_COMPONENT_NAME not in c.name]
228231

232+
def __remove_all_source_componens(self):
233+
print(self.__get_user_defined_source_components())
234+
for component in self.__get_user_defined_source_components():
235+
self.__remove_source_component(component.name)
236+
229237
def __test_other_component(self, components, excluded_from_other,
230238
included_in_other=None):
231239
"""
@@ -325,8 +333,6 @@ def test_filter_report_by_component(self):
325333
r.checkedFile.endswith('null_dereference.cpp')]
326334
self.assertEqual(len(divide_zero_reports), 0)
327335

328-
self.__remove_source_component(test_component['name'])
329-
330336
def test_filter_report_by_complex_component(self):
331337
"""
332338
Test report filter by complex component which includes and excludes
@@ -375,8 +381,6 @@ def test_filter_report_by_complex_component(self):
375381
r.checkedFile.endswith('path_begin.cpp')]
376382
self.assertEqual(len(divide_zero_reports), 0)
377383

378-
self.__remove_source_component(test_component['name'])
379-
380384
def test_filter_report_by_multiple_components(self):
381385
"""
382386
Test report filter by multiple components.
@@ -425,9 +429,6 @@ def test_filter_report_by_multiple_components(self):
425429
r.checkedFile.endswith('call_and_message.cpp')]
426430
self.assertEqual(len(call_and_msg_reports), 0)
427431

428-
self.__remove_source_component(test_component1['name'])
429-
self.__remove_source_component(test_component2['name'])
430-
431432
def test_filter_report_by_excluding_all_results_component(self):
432433
"""
433434
Test report filter by component which excludes all reports.
@@ -452,8 +453,6 @@ def test_filter_report_by_excluding_all_results_component(self):
452453
# No reports for this component.
453454
self.assertEqual(len(run_results), 0)
454455

455-
self.__remove_source_component(test_component['name'])
456-
457456
def test_component_name_with_whitespaces(self):
458457
"""
459458
Creates a new component which contains white spaces and removes it at
@@ -462,7 +461,6 @@ def test_component_name_with_whitespaces(self):
462461
test_component = self.components[1]
463462

464463
self.__add_new_component(test_component)
465-
self.__remove_source_component(test_component['name'])
466464

467465
def test_no_user_defined_component(self):
468466
"""
@@ -496,7 +494,6 @@ def test_other_with_single_user_defined_component(self):
496494

497495
excluded_from_other = ['divide_zero.cpp', 'new_delete.cpp']
498496
self.__test_other_component([component], excluded_from_other)
499-
self.__remove_source_component(component['name'])
500497

501498
def test_other_with_multiple_user_defined_component(self):
502499
"""
@@ -540,9 +537,6 @@ def test_other_with_multiple_user_defined_component(self):
540537
self.__test_other_component(components, excluded_from_other,
541538
included_in_other)
542539

543-
for c in components:
544-
self.__remove_source_component(c['name'])
545-
546540
def test_component_anywhere_on_path(self):
547541
"""
548542
Check "anywhere on report path" feature. With this flag one can query
@@ -589,6 +583,3 @@ def test_component_anywhere_on_path(self):
589583
self.assertEqual(len(component_results), 1)
590584
self.assertTrue(
591585
component_results[0].checkedFile.endswith('path_end.h'))
592-
593-
for c in components:
594-
self.__remove_source_component(c['name'])

web/tests/functional/dynamic_results/test_dynamic_results.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from libtest import env
2828

2929

30-
class DiffRemote(unittest.TestCase):
30+
class DynamicResults(unittest.TestCase):
3131

3232
def setup_class(self):
3333
"""Setup the environment for testing dynamic_results."""

0 commit comments

Comments
 (0)