Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[store] Unique reports _before_ storing #4152

Merged
merged 4 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,4 @@ def get_report_path_hash(report: Report) -> str:
if not report_path_hash:
LOG.error('Failed to generate report path hash: %s', report)

LOG.debug(report_path_hash)
return __str_to_hash(report_path_hash)
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,11 @@ def convert(
self._create_macro_expansion(
macro_expansion, file_index_map))

if report.annotations:
diagnostic["report-annotation"] = dict()
for key, value in report.annotations.items():
diagnostic["report-annotation"][key] = value

data['diagnostics'].append(diagnostic)

return data
Expand Down
73 changes: 54 additions & 19 deletions web/client/codechecker_client/cmd/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
reports as reports_helper, statistics as report_statistics
from codechecker_report_converter.report.hash import HashType, \
get_report_path_hash
from codechecker_report_converter.report.parser.base import AnalyzerInfo

from codechecker_client import client as libclient
from codechecker_client import product
Expand Down Expand Up @@ -404,10 +405,12 @@ def parse_analyzer_result_files(
""" Get reports from the given analyzer result files. """
analyzer_result_file_reports: AnalyzerResultFileReports = defaultdict(list)

for file_path, reports in zip(
for idx, (file_path, reports) in enumerate(zip(
analyzer_result_files, zip_iter(
functools.partial(get_reports, checker_labels=checker_labels),
analyzer_result_files)):
analyzer_result_files))):
LOG.debug(f"[{idx}/{len(analyzer_result_files)}] "
f"Parsed '{file_path}' ...")
analyzer_result_file_reports[file_path] = reports

return analyzer_result_file_reports
Expand All @@ -421,8 +424,12 @@ def assemble_zip(inputs,
"""Collect and compress report and source files, together with files
contanining analysis related information into a zip file which
will be sent to the server.

For each report directory, we create a uniqued zipped directory. Each
report directory to store could have been made with different
configurations, so we can't merge them all into a single zip.
"""
files_to_compress = set()
files_to_compress: Dict[str, set] = defaultdict(set)
analyzer_result_file_paths = []
stats = StorageZipStatistics()

Expand All @@ -431,21 +438,24 @@ def assemble_zip(inputs,

metadata_file_path = os.path.join(dir_path, 'metadata.json')
if os.path.exists(metadata_file_path):
files_to_compress.add(metadata_file_path)
files_to_compress[os.path.dirname(metadata_file_path)] \
.add(metadata_file_path)

skip_file_path = os.path.join(dir_path, 'skip_file')
if os.path.exists(skip_file_path):
with open(skip_file_path, 'r') as f:
LOG.info("Found skip file %s with the following content:\n%s",
skip_file_path, f.read())

files_to_compress.add(skip_file_path)
files_to_compress[os.path.dirname(skip_file_path)] \
.add(skip_file_path)

review_status_file_path = os.path.join(dir_path, 'review_status.yaml')
if os.path.exists(review_status_file_path):
files_to_compress.add(review_status_file_path)
files_to_compress[os.path.dirname(review_status_file_path)]\
.add(review_status_file_path)

LOG.debug("Processing report files ...")
LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...")

with MultiProcessPool() as executor:
analyzer_result_file_reports = parse_analyzer_result_files(
Expand All @@ -456,24 +466,43 @@ def assemble_zip(inputs,
changed_files = set()
file_paths = set()
file_report_positions: FileReportPositions = defaultdict(set)
unique_reports = set()
unique_reports: Dict[str, Dict[str, List[Report]]] = defaultdict(dict)

unique_report_hashes = set()
for file_path, reports in analyzer_result_file_reports.items():
files_to_compress.add(file_path)
stats.num_of_analyzer_result_files += 1

for report in reports:
if report.changed_files:
changed_files.update(report.changed_files)
continue
# Need to calculate unique reoirt count to determine report limit
# Unique all bug reports per report directory; also, count how many
# reports we want to store at once to check for the report store
# limit.
report_path_hash = get_report_path_hash(report)
if report_path_hash not in unique_reports:
unique_reports.add(report_path_hash)
if report_path_hash not in unique_report_hashes:
unique_report_hashes.add(report_path_hash)
unique_reports[os.path.dirname(file_path)]\
.setdefault(report.analyzer_name, []) \
.append(report)
stats.add_report(report)

file_paths.update(report.original_files)
file_report_positions[report.file.original_path].add(report.line)

files_to_delete = []
for dirname, analyzer_reports in unique_reports.items():
for analyzer_name, reports in analyzer_reports.items():
if not analyzer_name:
analyzer_name = 'unknown'
_, tmpfile = tempfile.mkstemp(f'-{analyzer_name}.plist')

report_file.create(tmpfile, reports, checker_labels,
AnalyzerInfo(analyzer_name))
LOG.debug(f"Stored '{analyzer_name}' unique reports in {tmpfile}.")
files_to_compress[dirname].add(tmpfile)
files_to_delete.append(tmpfile)

if changed_files:
reports_helper.dump_changed_files(changed_files)
sys.exit(1)
Expand Down Expand Up @@ -529,15 +558,17 @@ def assemble_zip(inputs,
LOG.info("Building report zip file...")
with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf:
# Add the files to the zip which will be sent to the server.
for file_path in files_to_compress:
_, file_name = os.path.split(file_path)

# Create a unique report directory name.
report_dir_name = hashlib.md5(os.path.dirname(
file_path).encode('utf-8')).hexdigest()
for dirname, files in files_to_compress.items():
for file_path in files:
_, file_name = os.path.split(file_path)

zip_target = os.path.join('reports', report_dir_name, file_name)
zipf.write(file_path, zip_target)
# Create a unique report directory name.
report_dir_name = \
hashlib.md5(dirname.encode('utf-8')).hexdigest()
zip_target = \
os.path.join('reports', report_dir_name, file_name)
zipf.write(file_path, zip_target)

collected_file_paths = set()
for f, h in file_to_hash.items():
Expand Down Expand Up @@ -611,6 +642,10 @@ def assemble_zip(inputs,
LOG.info("Compressing report zip file done (%s / %s).",
sizeof_fmt(zip_size), sizeof_fmt(compressed_zip_size))

# We are responsible for deleting these.
for file in files_to_delete:
os.remove(file)


def should_be_zipped(input_file: str, input_files: Iterable[str]) -> bool:
"""
Expand Down
25 changes: 8 additions & 17 deletions web/tests/functional/component/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ def setup_method(self, method):
}
]

def teardown_method(self, method):
self.__remove_all_source_componens()

def __add_new_component(self, component):
"""
Creates a new source component.
Expand Down Expand Up @@ -226,6 +229,11 @@ def __get_user_defined_source_components(self):
return [c for c in components
if GEN_OTHER_COMPONENT_NAME not in c.name]

def __remove_all_source_componens(self):
print(self.__get_user_defined_source_components())
for component in self.__get_user_defined_source_components():
self.__remove_source_component(component.name)

def __test_other_component(self, components, excluded_from_other,
included_in_other=None):
"""
Expand Down Expand Up @@ -325,8 +333,6 @@ def test_filter_report_by_component(self):
r.checkedFile.endswith('null_dereference.cpp')]
self.assertEqual(len(divide_zero_reports), 0)

self.__remove_source_component(test_component['name'])

def test_filter_report_by_complex_component(self):
"""
Test report filter by complex component which includes and excludes
Expand Down Expand Up @@ -375,8 +381,6 @@ def test_filter_report_by_complex_component(self):
r.checkedFile.endswith('path_begin.cpp')]
self.assertEqual(len(divide_zero_reports), 0)

self.__remove_source_component(test_component['name'])

def test_filter_report_by_multiple_components(self):
"""
Test report filter by multiple components.
Expand Down Expand Up @@ -425,9 +429,6 @@ def test_filter_report_by_multiple_components(self):
r.checkedFile.endswith('call_and_message.cpp')]
self.assertEqual(len(call_and_msg_reports), 0)

self.__remove_source_component(test_component1['name'])
self.__remove_source_component(test_component2['name'])

def test_filter_report_by_excluding_all_results_component(self):
"""
Test report filter by component which excludes all reports.
Expand All @@ -452,8 +453,6 @@ def test_filter_report_by_excluding_all_results_component(self):
# No reports for this component.
self.assertEqual(len(run_results), 0)

self.__remove_source_component(test_component['name'])

def test_component_name_with_whitespaces(self):
"""
Creates a new component which contains white spaces and removes it at
Expand All @@ -462,7 +461,6 @@ def test_component_name_with_whitespaces(self):
test_component = self.components[1]

self.__add_new_component(test_component)
self.__remove_source_component(test_component['name'])

def test_no_user_defined_component(self):
"""
Expand Down Expand Up @@ -496,7 +494,6 @@ def test_other_with_single_user_defined_component(self):

excluded_from_other = ['divide_zero.cpp', 'new_delete.cpp']
self.__test_other_component([component], excluded_from_other)
self.__remove_source_component(component['name'])

def test_other_with_multiple_user_defined_component(self):
"""
Expand Down Expand Up @@ -540,9 +537,6 @@ def test_other_with_multiple_user_defined_component(self):
self.__test_other_component(components, excluded_from_other,
included_in_other)

for c in components:
self.__remove_source_component(c['name'])

def test_component_anywhere_on_path(self):
"""
Check "anywhere on report path" feature. With this flag one can query
Expand Down Expand Up @@ -589,6 +583,3 @@ def test_component_anywhere_on_path(self):
self.assertEqual(len(component_results), 1)
self.assertTrue(
component_results[0].checkedFile.endswith('path_end.h'))

for c in components:
self.__remove_source_component(c['name'])
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from libtest import env


class DiffRemote(unittest.TestCase):
class DynamicResults(unittest.TestCase):

def setup_class(self):
"""Setup the environment for testing dynamic_results."""
Expand Down
Loading