Skip to content

Commit f240baa

Browse files
Merge pull request #107 from abnamro/2225563-metrics-for-number-of-findings
[#2225563] Created multiline charts for total findings count, true po…
2 parents cdec8aa + 5e454e8 commit f240baa

File tree

16 files changed

+1402
-91
lines changed

16 files changed

+1402
-91
lines changed

components/resc-backend/src/resc_backend/constants.py

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
RWS_ROUTE_LAST_SCAN = "/last-scan"
1414
RWS_ROUTE_FINDINGS = "/findings"
1515
RWS_ROUTE_RULES = "/rules"
16+
RWS_ROUTE_METRICS = "/metrics"
1617
RWS_ROUTE_DETAILED_FINDINGS = "/detailed-findings"
1718
RWS_ROUTE_TOTAL_COUNT_BY_RULE = "/total-count-by-rule"
1819
RWS_ROUTE_BY_RULE = "/by-rule"
@@ -22,7 +23,10 @@
2223
RWS_ROUTE_RULE_PACKS = "/rule-packs"
2324
RWS_ROUTE_VCS = "/vcs-instances"
2425

26+
RWS_ROUTE_AUDITED_COUNT_OVER_TIME = "/audited-count-over-time"
27+
RWS_ROUTE_UN_TRIAGED_COUNT_OVER_TIME = "/un-triaged-count-over-time"
2528
RWS_ROUTE_COUNT_BY_TIME = "/count-by-time"
29+
RWS_ROUTE_COUNT_PER_VCS_PROVIDER_BY_WEEK = "/count-per-vcs-provider-by-week"
2630
RWS_ROUTE_SUPPORTED_VCS_PROVIDERS = "/supported-vcs-providers"
2731
RWS_ROUTE_SUPPORTED_STATUSES = "/supported-statuses"
2832
RWS_ROUTE_DISTINCT_PROJECTS = "/distinct-projects"
@@ -43,6 +47,7 @@
4347
RULE_PACKS_TAG = "resc-rule-packs"
4448
HEALTH_TAG = "health"
4549
VCS_TAG = "resc-vcs-instances"
50+
METRICS_TAG = "resc-metrics"
4651

4752
DEFAULT_RECORDS_PER_PAGE_LIMIT = 100
4853
MAX_RECORDS_PER_PAGE_LIMIT = 500

components/resc-backend/src/resc_backend/resc_web_service/api.py

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
detailed_findings,
2626
findings,
2727
health,
28+
metrics,
2829
repositories,
2930
rules,
3031
rule_packs,
@@ -95,6 +96,7 @@ def generate_logger_config(log_file_path, debug=True):
9596
{"name": "resc-scans", "description": "Manage scan information"},
9697
{"name": "resc-findings", "description": "Manage findings information"},
9798
{"name": "resc-vcs-instances", "description": "Manage vcs instance information"},
99+
{"name": "resc-metrics", "description": "Retrieve metrics"},
98100
]
99101

100102
# Check if authentication is required for api endpoints
@@ -126,6 +128,7 @@ def generate_logger_config(log_file_path, debug=True):
126128
app.include_router(repositories.router, prefix=RWS_VERSION_PREFIX)
127129
app.include_router(scans.router, prefix=RWS_VERSION_PREFIX)
128130
app.include_router(vcs_instances.router, prefix=RWS_VERSION_PREFIX)
131+
app.include_router(metrics.router, prefix=RWS_VERSION_PREFIX)
129132

130133
# Add exception handlers
131134
add_exception_handlers(app=app)

components/resc-backend/src/resc_backend/resc_web_service/crud/finding.py

+161-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# pylint: disable=R0916,R0912,C0121
22
# Standard Library
33
import logging
4-
from datetime import datetime
4+
from datetime import datetime, timedelta
55
from typing import List
66

77
# Third Party
8-
from sqlalchemy import and_, extract, func, or_
8+
from sqlalchemy import and_, extract, func, or_, union
9+
from sqlalchemy.engine import Row
910
from sqlalchemy.orm import Session
1011

1112
# First Party
@@ -450,6 +451,7 @@ def get_rule_findings_count_by_status(db_connection: Session, rule_pack_versions
450451
"clarification_required": 0,
451452
"total_findings_count": 0
452453
}
454+
453455
for status_count in status_counts:
454456
rule_count_dict[status_count[0]]["total_findings_count"] += status_count[2]
455457
if status_count[1] == FindingStatus.NOT_ANALYZED or status_count[1] is None:
@@ -640,3 +642,160 @@ def delete_findings_by_vcs_instance_id(db_connection: Session, vcs_instance_id:
640642
model.vcs_instance.DBVcsInstance.id_ == vcs_instance_id) \
641643
.delete(synchronize_session=False)
642644
db_connection.commit()
645+
646+
647+
def get_finding_audit_status_count_over_time(db_connection: Session, status: FindingStatus, weeks: int = 13) -> dict:
648+
"""
649+
Retrieve count of true positive findings over time for given weeks
650+
:param db_connection:
651+
Session of the database connection
652+
:param status:
653+
mandatory, status for which to get the audit counts over time
654+
:param weeks:
655+
optional, filter on last n weeks, default 13
656+
:return: true_positive_count_over_time
657+
list of rows containing finding statuses count over time per week
658+
"""
659+
all_tables = []
660+
for week in range(0, weeks):
661+
last_nth_week_date_time = datetime.utcnow() - timedelta(weeks=week)
662+
query = db_connection.query(extract('year', last_nth_week_date_time).label("year"),
663+
extract('week', last_nth_week_date_time).label("week"),
664+
model.DBVcsInstance.provider_type.label("provider_type"),
665+
func.count(model.DBaudit.id_).label("finding_count")
666+
)
667+
max_audit_subquery = db_connection.query(func.max(model.DBaudit.id_).label("audit_id")) \
668+
.filter(extract('year', model.DBaudit.timestamp) == extract('year', last_nth_week_date_time)) \
669+
.filter(extract('week', model.DBaudit.timestamp) <= extract('week', last_nth_week_date_time)) \
670+
.group_by(model.DBaudit.finding_id).subquery()
671+
query = query.join(max_audit_subquery, max_audit_subquery.c.audit_id == model.DBaudit.id_)
672+
query = query.join(model.DBfinding, model.DBfinding.id_ == model.DBaudit.finding_id)
673+
query = query.join(model.DBbranch, model.DBbranch.id_ == model.DBfinding.branch_id)
674+
query = query.join(model.DBrepository, model.DBrepository.id_ == model.DBbranch.repository_id)
675+
query = query.join(model.DBVcsInstance, model.DBVcsInstance.id_ == model.DBrepository.vcs_instance)
676+
query = query.filter(model.DBaudit.status == status)
677+
query = query.group_by(model.DBVcsInstance.provider_type)
678+
679+
all_tables.append(query)
680+
681+
# union
682+
unioned_query = union(*all_tables)
683+
status_count_over_time = db_connection.execute(unioned_query).all()
684+
return status_count_over_time
685+
686+
687+
def get_finding_count_by_vcs_provider_over_time(db_connection: Session, weeks: int = 13) -> list[Row]:
688+
"""
689+
Retrieve count findings over time for given weeks
690+
:param db_connection:
691+
Session of the database connection
692+
:param weeks:
693+
optional, filter on last n weeks, default 13
694+
:return: count_over_time
695+
list of rows containing finding count over time per week
696+
"""
697+
all_tables = []
698+
for week in range(0, weeks):
699+
last_nth_week_date_time = datetime.utcnow() - timedelta(weeks=week)
700+
query = db_connection.query(extract('year', last_nth_week_date_time).label("year"),
701+
extract('week', last_nth_week_date_time).label("week"),
702+
model.DBVcsInstance.provider_type.label("provider_type"),
703+
func.count(model.DBfinding.id_).label("finding_count")
704+
)
705+
max_base_scan = db_connection.query(func.max(model.DBscan.id_).label("scan_id"),
706+
model.DBscan.branch_id) \
707+
.filter(extract('year', model.DBscan.timestamp) == extract('year', last_nth_week_date_time)) \
708+
.filter(extract('week', model.DBscan.timestamp) <= extract('week', last_nth_week_date_time)) \
709+
.filter(model.DBscan.scan_type == ScanType.BASE) \
710+
.group_by(model.DBscan.branch_id).subquery()
711+
712+
query = query.join(model.DBscanFinding, model.DBfinding.id_ == model.DBscanFinding.finding_id)
713+
query = query.join(model.DBscan, model.DBscan.id_ == model.DBscanFinding.scan_id)
714+
query = query.join(max_base_scan, and_(max_base_scan.c.branch_id == model.DBscan.branch_id,
715+
or_(model.DBscan.id_ == max_base_scan.c.scan_id,
716+
(and_(model.DBscan.id_ > max_base_scan.c.scan_id,
717+
model.DBscan.scan_type == ScanType.INCREMENTAL,
718+
extract('week', model.DBscan.timestamp) <=
719+
extract('week', last_nth_week_date_time),
720+
extract('year', model.DBscan.timestamp) ==
721+
extract('year', last_nth_week_date_time)))
722+
)
723+
)
724+
)
725+
query = query.join(model.DBbranch, model.DBbranch.id_ == model.DBscan.branch_id)
726+
query = query.join(model.DBrepository, model.DBrepository.id_ == model.DBbranch.repository_id)
727+
query = query.join(model.DBVcsInstance, model.DBVcsInstance.id_ == model.DBrepository.vcs_instance)
728+
query = query.group_by(model.DBVcsInstance.provider_type)
729+
730+
all_tables.append(query)
731+
732+
# union
733+
unioned_query = union(*all_tables)
734+
count_over_time = db_connection.execute(unioned_query).all()
735+
return count_over_time
736+
737+
738+
def get_un_triaged_finding_count_by_vcs_provider_over_time(db_connection: Session, weeks: int = 13) -> list[Row]:
739+
"""
740+
Retrieve count of un triaged findings over time for given weeks
741+
:param db_connection:
742+
Session of the database connection
743+
:param weeks:
744+
optional, filter on last n weeks, default 13
745+
:return: count_over_time
746+
list of rows containing un triaged findings count over time per week
747+
"""
748+
all_tables = []
749+
for week in range(0, weeks):
750+
last_nth_week_date_time = datetime.utcnow() - timedelta(weeks=week)
751+
query = db_connection.query(extract('year', last_nth_week_date_time).label("year"),
752+
extract('week', last_nth_week_date_time).label("week"),
753+
model.DBVcsInstance.provider_type.label("provider_type"),
754+
func.count(model.DBfinding.id_).label("finding_count")
755+
)
756+
max_base_scan = db_connection.query(func.max(model.DBscan.id_).label("scan_id"),
757+
model.DBscan.branch_id) \
758+
.filter(extract('year', model.DBscan.timestamp) == extract('year', last_nth_week_date_time)) \
759+
.filter(extract('week', model.DBscan.timestamp) <= extract('week', last_nth_week_date_time)) \
760+
.filter(model.DBscan.scan_type == ScanType.BASE) \
761+
.group_by(model.DBscan.branch_id).subquery()
762+
763+
max_audit_subquery = db_connection.query(model.DBaudit.finding_id,
764+
func.max(model.DBaudit.id_).label("audit_id")) \
765+
.filter(extract('year', model.DBaudit.timestamp) == extract('year', last_nth_week_date_time)) \
766+
.filter(extract('week', model.DBaudit.timestamp) <= extract('week', last_nth_week_date_time)) \
767+
.group_by(model.DBaudit.finding_id).subquery()
768+
769+
query = query.join(model.DBscanFinding, model.DBfinding.id_ == model.DBscanFinding.finding_id)
770+
query = query.join(model.DBscan, model.DBscan.id_ == model.DBscanFinding.scan_id)
771+
query = query.join(max_base_scan, and_(max_base_scan.c.branch_id == model.DBscan.branch_id,
772+
or_(model.DBscan.id_ == max_base_scan.c.scan_id,
773+
(and_(model.DBscan.id_ > max_base_scan.c.scan_id,
774+
model.DBscan.scan_type == ScanType.INCREMENTAL,
775+
extract('week', model.DBscan.timestamp) <=
776+
extract('week', last_nth_week_date_time),
777+
extract('year', model.DBscan.timestamp) ==
778+
extract('year', last_nth_week_date_time)))
779+
)
780+
)
781+
)
782+
query = query.join(model.DBbranch, model.DBbranch.id_ == model.DBscan.branch_id)
783+
query = query.join(model.DBrepository, model.DBrepository.id_ == model.DBbranch.repository_id)
784+
query = query.join(model.DBVcsInstance, model.DBVcsInstance.id_ == model.DBrepository.vcs_instance)
785+
786+
query = query.join(max_audit_subquery, max_audit_subquery.c.finding_id == model.finding.DBfinding.id_,
787+
isouter=True)
788+
query = query.join(model.DBaudit, and_(model.audit.DBaudit.finding_id == model.finding.DBfinding.id_,
789+
model.audit.DBaudit.id_ == max_audit_subquery.c.audit_id),
790+
isouter=True)
791+
query = query.filter(
792+
or_(model.DBaudit.id_ == None, model.DBaudit.status == FindingStatus.NOT_ANALYZED)) # noqa: E711
793+
794+
query = query.group_by(model.DBVcsInstance.provider_type)
795+
796+
all_tables.append(query)
797+
798+
# union
799+
unioned_query = union(*all_tables)
800+
count_over_time = db_connection.execute(unioned_query).all()
801+
return count_over_time
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Standard Library
2+
import logging
3+
from datetime import datetime, timedelta
4+
from typing import Optional
5+
6+
# Third Party
7+
from fastapi import APIRouter, Depends, Query, status
8+
9+
# First Party
10+
from resc_backend.constants import (
11+
ERROR_MESSAGE_500,
12+
ERROR_MESSAGE_503,
13+
METRICS_TAG,
14+
RWS_ROUTE_AUDITED_COUNT_OVER_TIME,
15+
RWS_ROUTE_COUNT_PER_VCS_PROVIDER_BY_WEEK,
16+
RWS_ROUTE_METRICS,
17+
RWS_ROUTE_UN_TRIAGED_COUNT_OVER_TIME
18+
)
19+
from resc_backend.db.connection import Session
20+
from resc_backend.resc_web_service.crud import finding as finding_crud
21+
from resc_backend.resc_web_service.dependencies import get_db_connection
22+
from resc_backend.resc_web_service.schema.finding_count_over_time import FindingCountOverTime
23+
from resc_backend.resc_web_service.schema.finding_status import FindingStatus
24+
from resc_backend.resc_web_service.schema.vcs_provider import VCSProviders
25+
26+
router = APIRouter(prefix=f"{RWS_ROUTE_METRICS}", tags=[METRICS_TAG])
27+
logger = logging.getLogger(__name__)
28+
29+
30+
@router.get(f"{RWS_ROUTE_AUDITED_COUNT_OVER_TIME}",
31+
response_model=list[FindingCountOverTime],
32+
summary="Get count of audit status over time for given weeks per vcs provider",
33+
status_code=status.HTTP_200_OK,
34+
responses={
35+
200: {"description": "Retrieve count of audit status over time for given weeks per vcs provider"},
36+
500: {"description": ERROR_MESSAGE_500},
37+
503: {"description": ERROR_MESSAGE_503}
38+
})
39+
def get_finding_audit_count_over_time(db_connection: Session = Depends(get_db_connection),
40+
weeks: Optional[int] = Query(default=13, ge=1),
41+
audit_status: Optional[FindingStatus] = Query(default=FindingStatus.TRUE_POSITIVE)
42+
) -> list[FindingCountOverTime]:
43+
"""
44+
Retrieve count of audited findings over time for given weeks per vcs provider
45+
- **db_connection**: Session of the database connection
46+
- **weeks**: Nr of weeks for which to retrieve the audit status count
47+
- **audit_status**: audit status for which to retrieve the counts, defaults to True positive
48+
- **return**: [DateCountModel]
49+
The output will contain a list of DateCountModel type objects
50+
"""
51+
audit_counts = finding_crud.get_finding_audit_status_count_over_time(db_connection=db_connection,
52+
status=audit_status,
53+
weeks=weeks)
54+
output = convert_rows_to_finding_count_over_time(count_over_time=audit_counts, weeks=weeks)
55+
return output
56+
57+
58+
@router.get(f"{RWS_ROUTE_COUNT_PER_VCS_PROVIDER_BY_WEEK}",
59+
response_model=list[FindingCountOverTime],
60+
summary="Get count of findings over time for given weeks per vcs provider",
61+
status_code=status.HTTP_200_OK,
62+
responses={
63+
200: {"description": "Retrieve count of findings over time for given weeks per vcs provider"},
64+
500: {"description": ERROR_MESSAGE_500},
65+
503: {"description": ERROR_MESSAGE_503}
66+
})
67+
def get_finding_total_count_over_time(db_connection: Session = Depends(get_db_connection),
68+
weeks: Optional[int] = Query(default=13, ge=1)) -> list[FindingCountOverTime]:
69+
"""
70+
Retrieve count of findings over time for given weeks per vcs provider
71+
- **db_connection**: Session of the database connection
72+
- **weeks**: Nr of weeks for which to retrieve the audit status count
73+
- **audit_status**: audit status for which to retrieve the counts, defaults to True positive
74+
- **return**: [DateCountModel]
75+
The output will contain a list of DateCountModel type objects
76+
"""
77+
audit_counts = finding_crud.get_finding_count_by_vcs_provider_over_time(db_connection=db_connection, weeks=weeks)
78+
output = convert_rows_to_finding_count_over_time(count_over_time=audit_counts, weeks=weeks)
79+
return output
80+
81+
82+
@router.get(f"{RWS_ROUTE_UN_TRIAGED_COUNT_OVER_TIME}",
83+
response_model=list[FindingCountOverTime],
84+
summary="Get count of UnTriaged findings over time for given weeks per vcs provider",
85+
status_code=status.HTTP_200_OK,
86+
responses={
87+
200: {"description": "Retrieve count of UnTriaged findings over time for given weeks per vcs provider"},
88+
500: {"description": ERROR_MESSAGE_500},
89+
503: {"description": ERROR_MESSAGE_503}
90+
})
91+
def get_finding_un_triaged_count_over_time(db_connection: Session = Depends(get_db_connection),
92+
weeks: Optional[int] = Query(default=13, ge=1)) \
93+
-> list[FindingCountOverTime]:
94+
"""
95+
Retrieve count of UnTriaged findings over time for given weeks per vcs provider
96+
- **db_connection**: Session of the database connection
97+
- **weeks**: Nr of weeks for which to retrieve the audit status count
98+
- **audit_status**: audit status for which to retrieve the counts, defaults to True positive
99+
- **return**: [DateCountModel]
100+
The output will contain a list of DateCountModel type objects
101+
"""
102+
audit_counts = finding_crud.get_un_triaged_finding_count_by_vcs_provider_over_time(db_connection=db_connection,
103+
weeks=weeks)
104+
output = convert_rows_to_finding_count_over_time(count_over_time=audit_counts, weeks=weeks)
105+
return output
106+
107+
108+
def convert_rows_to_finding_count_over_time(count_over_time: dict, weeks: int) -> list[FindingCountOverTime]:
109+
"""
110+
Convert the rows from the database to the format of list[FindingCountOverTime]
111+
:param count_over_time:
112+
rows from the database
113+
:param weeks:
114+
number fo weeks that are in the data
115+
:return: output
116+
list[FindingCountOverTime]
117+
"""
118+
# Define the vcs provider types and finding statuses
119+
vcs_provider_types = list(VCSProviders)
120+
121+
# create defaults with 0 value
122+
week_groups = {}
123+
for week in range(0, weeks):
124+
nth_week = datetime.utcnow() - timedelta(weeks=week)
125+
week = f"{nth_week.isocalendar().year} W{nth_week.isocalendar().week:02d}"
126+
week_groups[week] = {vcs_provider_type: 0 for vcs_provider_type in vcs_provider_types + ["total"]}
127+
128+
# loop over the counts from the database
129+
for data in count_over_time:
130+
week = f"{data['year']} W{data['week']:02d}"
131+
finding_count = data["finding_count"]
132+
133+
week_groups[week][data["provider_type"]] += finding_count
134+
week_groups[week]["total"] += finding_count
135+
136+
# Convert to the output format
137+
output = []
138+
for week in sorted(week_groups.keys()):
139+
week_data = FindingCountOverTime(time_period=week, total=week_groups[week]["total"])
140+
for vcs_provider_type in vcs_provider_types:
141+
setattr(week_data.vcs_provider_finding_count, vcs_provider_type, week_groups[week][vcs_provider_type])
142+
143+
output.append(week_data)
144+
return output
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# pylint: disable=no-name-in-module
2+
# Third Party
3+
from pydantic import BaseModel
4+
5+
6+
class VcsProviderFindingCount(BaseModel):
7+
AZURE_DEVOPS: int = 0
8+
BITBUCKET: int = 0
9+
GITHUB_PUBLIC: int = 0
10+
11+
12+
class FindingCountOverTime(BaseModel):
13+
time_period: str
14+
vcs_provider_finding_count: VcsProviderFindingCount = VcsProviderFindingCount()
15+
total: int = 0

0 commit comments

Comments
 (0)