Skip to content

Commit

Permalink
Refactor rules and commit with features, collect runtime stats (#233, #…
Browse files Browse the repository at this point in the history
…242)

* collect runtime stats (by Gergő Balogh, aka geryxyz) #233
* refactor: drop CommitWithFeatures #242
  • Loading branch information
copernico authored Sep 15, 2021
1 parent a5916f2 commit 3122cb6
Show file tree
Hide file tree
Showing 39 changed files with 1,366 additions and 546 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ prospector/prospector-report.html
prospector/test_report.html
prospector/.idea/*
similarities.csv
prospector/demo_ul.html
12 changes: 6 additions & 6 deletions prospector/client/cli/console_report.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import log.util
from datamodel.advisory import AdvisoryRecord
from datamodel.commit_features import CommitWithFeatures
from datamodel.commit import Commit

_logger = log.util.init_local_logger()


def report_on_console(
results: "list[CommitWithFeatures]", advisory_record: AdvisoryRecord, verbose=False
results: "list[Commit]", advisory_record: AdvisoryRecord, verbose=False
):
def format_annotations(commit: CommitWithFeatures) -> str:
def format_annotations(commit: Commit) -> str:
out = ""
if verbose:
for tag in commit.annotations:
Expand All @@ -25,9 +25,9 @@ def format_annotations(commit: CommitWithFeatures) -> str:
for commit in results:
count += 1
print(
f"\n----------\n{commit.commit.repository}/commit/{commit.commit.commit_id}\n"
+ "\n".join(commit.commit.changed_files)
+ f"{commit.commit.message}\n{format_annotations(commit)}"
f"\n----------\n{commit.repository}/commit/{commit.commit_id}\n"
+ "\n".join(commit.changed_files)
+ f"{commit.message}\n{format_annotations(commit)}"
)

print(f"Found {count} candidates\nAdvisory record\n{advisory_record}")
15 changes: 10 additions & 5 deletions prospector/client/cli/html_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,22 @@

import log.util
from datamodel.advisory import AdvisoryRecord
from datamodel.commit_features import CommitWithFeatures
from datamodel.commit import Commit
from simple_hierarchical_storage.execution import execution_statistics

_logger = log.util.init_local_logger()


def report_as_html(
results: List[CommitWithFeatures],
results: List[Commit],
advisory_record: AdvisoryRecord,
filename: str = "prospector-report.html",
statistics=None,
):
annotations_count = {}
commit_with_feature: CommitWithFeatures
for commit_with_feature in results:
for annotation in commit_with_feature.annotations.keys():
annotated_commit: Commit
for annotated_commit in results:
for annotation in annotated_commit.annotations.keys():
annotations_count[annotation] = annotations_count.get(annotation, 0) + 1

_logger.info("Writing results to " + filename)
Expand All @@ -32,6 +34,9 @@ def report_as_html(
candidates=results,
present_annotations=annotations_count,
advisory_record=advisory_record,
execution_statistics=(
execution_statistics if statistics is None else statistics
).as_html_ul(),
):
html_file.write(content)
return filename
46 changes: 20 additions & 26 deletions prospector/client/cli/html_report_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from client.cli.html_report import report_as_html
from datamodel.advisory import AdvisoryRecord
from datamodel.commit import Commit
from datamodel.commit_features import CommitWithFeatures
from util.sample_data_generation import (
from util.sample_data_generation import ( # random_list_of_url,
random_bool,
random_commit_hash,
random_dict_of_strs,
Expand All @@ -16,39 +15,32 @@
random_list_of_jira_refs,
random_list_of_path,
random_list_of_strs,
random_list_of_url,
random_list_of_version,
random_url,
sample_statistics,
)


def test_report_generation():
candidates = []
for _ in range(100):
commit_with_feature = CommitWithFeatures(
commit=Commit(
commit_id=random_commit_hash(),
repository=random_url(4),
message=" ".join(random_list_of_strs(100)),
timestamp=randint(0, 100000),
hunks=random_list_of_hunks(1000, 42),
diff=random_list_of_strs(200),
changed_files=random_list_of_path(4, 42),
message_reference_content=random_list_of_strs(42),
jira_refs=random_list_of_jira_refs(42),
ghissue_refs=random_list_of_github_issue_ids(100000, 42),
cve_refs=random_list_of_cve(42),
tags=random_list_of_strs(42),
),
references_vuln_id=random_bool(),
time_between_commit_and_advisory_record=randint(0, 42),
changes_relevant_path=set(random_list_of_path(4, 42)),
other_CVE_in_message=set(random_list_of_cve(42)),
referred_to_by_pages_linked_from_advisories=set(random_list_of_url(4, 42)),
referred_to_by_nvd=set(random_list_of_url(4, 42)),
annotated_candidates = Commit(
commit_id=random_commit_hash(),
repository=random_url(4),
message=" ".join(random_list_of_strs(100)),
timestamp=randint(0, 100000),
hunks=random_list_of_hunks(1000, 42),
diff=random_list_of_strs(200),
changed_files=random_list_of_path(4, 42),
message_reference_content=random_list_of_strs(42),
jira_refs=random_list_of_jira_refs(42),
ghissue_refs=random_list_of_github_issue_ids(100000, 42),
cve_refs=random_list_of_cve(42),
tags=random_list_of_strs(42),
annotations=random_dict_of_strs(16, 10),
)
candidates.append(commit_with_feature)

candidates.append(annotated_candidates)

advisory = AdvisoryRecord(
vulnerability_id=random_list_of_cve(max_count=1, min_count=1)[0],
Expand All @@ -71,5 +63,7 @@ def test_report_generation():
filename = "test_report.html"
if os.path.isfile(filename):
os.remove(filename)
generated_report = report_as_html(candidates, advisory, filename)
generated_report = report_as_html(
candidates, advisory, filename, statistics=sample_statistics()
)
assert os.path.isfile(generated_report)
6 changes: 2 additions & 4 deletions prospector/client/cli/json_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

import log.util
from datamodel.advisory import AdvisoryRecord
from datamodel.commit_features import CommitWithFeatures
from datamodel.commit import Commit

_logger = log.util.init_local_logger()


def report_as_json(
results: "list[CommitWithFeatures]", advisory_record: AdvisoryRecord
):
def report_as_json(results: "list[Commit]", advisory_record: AdvisoryRecord):

data = {
"advisory_record": advisory_record.dict(),
Expand Down
197 changes: 101 additions & 96 deletions prospector/client/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
prospector,
)
from git.git import GIT_CACHE
from simple_hierarchical_storage.execution import ExecutionTimer, execution_statistics

_logger = log.util.init_local_logger()

Expand Down Expand Up @@ -166,108 +167,112 @@ def ping_backend(server_url: str, verbose: bool = False) -> bool:


def main(argv): # noqa: C901
args = parseArguments(argv)
configuration = getConfiguration(args.conf)
with ExecutionTimer(execution_statistics.sub_collection(name="initialization")):
args = parseArguments(argv)
configuration = getConfiguration(args.conf)

if args.log_level:
log.config.level = getattr(logging, args.log_level)
if args.log_level:
log.config.level = getattr(logging, args.log_level)

_logger.info(f"global log level is set to {logging.getLevelName(log.config.level)}")
_logger.info(
f"global log level is set to {logging.getLevelName(log.config.level)}"
)

if args.vulnerability_id is None:
_logger.error("No vulnerability id was specified. Cannot proceed.")
return False

if configuration is None:
_logger.error("Invalid configuration, exiting.")
return False

report = configuration["global"].getboolean("report")
if args.report:
report = args.report

if configuration["global"].get("nvd_rest_endpoint"):
nvd_rest_endpoint = configuration["global"].get("nvd_rest_endpoint")

backend = configuration["global"].get("backend") or DEFAULT_BACKEND
if args.backend:
backend = args.backend

if args.ping:
return ping_backend(backend, log.config.level < logging.INFO)

vulnerability_id = args.vulnerability_id
repository_url = args.repository

vuln_descr = args.descr
use_nvd = args.use_nvd
tag_interval = args.tag_interval
version_interval = args.version_interval
time_limit_before = TIME_LIMIT_BEFORE
time_limit_after = TIME_LIMIT_AFTER
max_candidates = args.max_candidates
modified_files = args.modified_files.split(",")
<<<<<<< HEAD
=======

>>>>>>> 30aa6ce (bug: handling of user-supplied special tokens)
code_tokens = (
args.diff_contains.split(",") if args.diff_contains is not None else []
)
if args.vulnerability_id is None:
_logger.error("No vulnerability id was specified. Cannot proceed.")
return False

print(code_tokens)

publication_date = ""
if args.pub_date != "":
publication_date = args.pub_date + "T00:00Z"
# if the date is forced manually, the time interval can
# be restricted
# time_limit_before = int(time_limit_before / 5)
# time_limit_after = int(time_limit_after / 2)

git_cache = GIT_CACHE
if os.environ["GIT_CACHE"]:
git_cache = os.environ["GIT_CACHE"]
if configuration["global"].get("git_cache"):
git_cache = configuration["global"].get("git_cache")

_logger.debug("Using the following configuration:")
_logger.pretty_log(
{section: dict(configuration[section]) for section in configuration.sections()}
)
if configuration is None:
_logger.error("Invalid configuration, exiting.")
return False

_logger.debug("Vulnerability ID: " + vulnerability_id)
_logger.debug("time-limit before: " + str(time_limit_before))
_logger.debug("time-limit after: " + str(time_limit_after))

results, advisory_record = prospector(
vulnerability_id=vulnerability_id,
repository_url=repository_url,
publication_date=publication_date,
vuln_descr=vuln_descr,
tag_interval=tag_interval,
version_interval=version_interval,
modified_files=modified_files,
code_tokens=code_tokens,
time_limit_before=time_limit_before,
time_limit_after=time_limit_after,
use_nvd=use_nvd,
nvd_rest_endpoint=nvd_rest_endpoint,
backend_address=backend,
git_cache=git_cache,
limit_candidates=max_candidates,
active_rules=["ALL"],
)
report = configuration["global"].getboolean("report")
if args.report:
report = args.report

if configuration["global"].get("nvd_rest_endpoint"):
nvd_rest_endpoint = configuration["global"].get("nvd_rest_endpoint")

backend = configuration["global"].get("backend") or DEFAULT_BACKEND
if args.backend:
backend = args.backend

if args.ping:
return ping_backend(backend, log.config.level < logging.INFO)

vulnerability_id = args.vulnerability_id
repository_url = args.repository

vuln_descr = args.descr
use_nvd = args.use_nvd
tag_interval = args.tag_interval
version_interval = args.version_interval
time_limit_before = TIME_LIMIT_BEFORE
time_limit_after = TIME_LIMIT_AFTER
max_candidates = args.max_candidates
modified_files = args.modified_files.split(",")
code_tokens = (
args.diff_contains.split(",") if args.diff_contains is not None else []
)

publication_date = ""
if args.pub_date != "":
publication_date = args.pub_date + "T00:00Z"
# if the date is forced manually, the time interval can
# be restricted
# time_limit_before = int(time_limit_before / 5)
# time_limit_after = int(time_limit_after / 2)

git_cache = GIT_CACHE
if os.environ["GIT_CACHE"]:
git_cache = os.environ["GIT_CACHE"]
if configuration["global"].get("git_cache"):
git_cache = configuration["global"].get("git_cache")

_logger.debug("Using the following configuration:")
_logger.pretty_log(
{
section: dict(configuration[section])
for section in configuration.sections()
}
)

_logger.debug("Vulnerability ID: " + vulnerability_id)
_logger.debug("time-limit before: " + str(time_limit_before))
_logger.debug("time-limit after: " + str(time_limit_after))

with ExecutionTimer(execution_statistics.sub_collection(name="core")):
results, advisory_record = prospector(
vulnerability_id=vulnerability_id,
repository_url=repository_url,
publication_date=publication_date,
vuln_descr=vuln_descr,
tag_interval=tag_interval,
version_interval=version_interval,
modified_files=modified_files,
code_tokens=code_tokens,
time_limit_before=time_limit_before,
time_limit_after=time_limit_after,
use_nvd=use_nvd,
nvd_rest_endpoint=nvd_rest_endpoint,
backend_address=backend,
git_cache=git_cache,
limit_candidates=max_candidates,
active_rules=["ALL"],
)

with ExecutionTimer(execution_statistics.sub_collection(name="reporting")):
if report == "console":
report_on_console(results, advisory_record, log.config.level < logging.INFO)
elif report == "json":
report_as_json(results, advisory_record)
elif report == "html":
report_as_html(results, advisory_record)
else:
_logger.warning("Invalid report type specified, using 'console'")
report_on_console(results, advisory_record, log.config.level < logging.INFO)

if report == "console":
report_on_console(results, advisory_record, log.config.level < logging.INFO)
elif report == "json":
report_as_json(results, advisory_record)
elif report == "html":
report_as_html(results, advisory_record)
else:
_logger.warning("Invalid report type specified, using 'console'")
report_on_console(results, advisory_record, log.config.level < logging.INFO)
_logger.info("\n" + execution_statistics.generate_console_tree())
return True


Expand Down
Loading

0 comments on commit 3122cb6

Please sign in to comment.