Skip to content

Commit d2edbdc

Browse files
[#2241513] Rule categorization
1 parent f240baa commit d2edbdc

36 files changed

+1052
-194
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""rule tags
2+
3+
Revision ID: 8dd0f349b5ad
4+
Revises: ar7fh3ac2071
5+
Create Date: 2023-05-02 10:10:20.423021
6+
7+
"""
8+
import logging
9+
10+
from alembic import op
11+
import sqlalchemy as sa
12+
13+
# revision identifiers, used by Alembic.
14+
from sqlalchemy.engine import Inspector
15+
16+
revision = '8dd0f349b5ad'
17+
down_revision = 'ar7fh3ac2071'
18+
branch_labels = None
19+
depends_on = None
20+
21+
# Logger
22+
logger = logging.getLogger()
23+
24+
25+
def upgrade():
26+
inspector = Inspector.from_engine(op.get_bind())
27+
28+
drop_if_exist(inspector, "tag")
29+
if not inspector.has_table("tag"):
30+
logger.info("Creating table tag")
31+
op.create_table("tag",
32+
sa.Column('id', sa.Integer(), nullable=False),
33+
sa.Column('name', sa.String(length=200), nullable=False),
34+
sa.PrimaryKeyConstraint('id')
35+
)
36+
37+
drop_if_exist(inspector, "rule_tag")
38+
if not inspector.has_table("rule_tag"):
39+
logger.info("Creating table rule_tag")
40+
op.create_table('rule_tag',
41+
sa.Column('rule_id', sa.Integer(), nullable=False),
42+
sa.Column('tag_id', sa.Integer(), nullable=False),
43+
sa.ForeignKeyConstraint(['rule_id'], ['rules.id'], ),
44+
sa.ForeignKeyConstraint(['tag_id'], ['tag.id'], ),
45+
sa.PrimaryKeyConstraint('rule_id', 'tag_id'),
46+
)
47+
# insert data in to tag table
48+
logger.info("Inserting data in to tag table")
49+
op.execute("INSERT INTO tag (name) "
50+
"select distinct cs.Value as name from rules "
51+
"cross apply STRING_SPLIT(tags, ',') cs")
52+
53+
# insert data in to rule_tag table
54+
logger.info("Inserting data in to rule_tag table")
55+
op.execute("INSERT INTO rule_tag (rule_id, tag_id) "
56+
"select rules.id, tag.id from rules "
57+
"cross apply STRING_SPLIT(tags, ',') cs join tag on tag.name = cs.Value")
58+
59+
# Drop tags column from rules table
60+
logger.info("Drop tags column from rules table")
61+
op.drop_column('rules', 'tags')
62+
63+
64+
def downgrade():
65+
# add tags column to rules
66+
op.add_column('rules', sa.Column('tags', sa.String(length=2000), nullable=True))
67+
68+
# insert data in to rules table
69+
logger.info("Update tags in rules table")
70+
op.execute("update rules set tags = ("
71+
"SELECT STRING_AGG(tag.name, ',') as tags "
72+
"FROM rule_tag "
73+
"JOIN tag ON tag.id = rule_tag.tag_id "
74+
"where rule_id = rules.id "
75+
"GROUP BY rule_id)")
76+
77+
# drop rule_tag and tag tables
78+
op.drop_table('tag')
79+
op.drop_table('rule_tag')
80+
81+
82+
def drop_if_exist(inspector: Inspector, table_name: str):
83+
if inspector.has_table(table_name):
84+
logger.info(f"Dropping table {table_name}")
85+
op.drop_table(table_name)

components/resc-backend/setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[metadata]
22
name = resc_backend
33
description = Repository Scanner - Backend
4-
version = 1.2.0
4+
version = 1.3.0
55
author = ABN AMRO
66
author_email = resc@nl.abnamro.com
77
url = https://github.com/ABNAMRO/repository-scanner

components/resc-backend/src/resc_backend/db/model/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from resc_backend.db.model.rule import DBrule
2020
from resc_backend.db.model.rule_allow_list import DBruleAllowList
2121
from resc_backend.db.model.rule_pack import DBrulePack
22+
from resc_backend.db.model.rule_tag import DBruleTag
2223
from resc_backend.db.model.scan import DBscan
2324
from resc_backend.db.model.scan_finding import DBscanFinding
25+
from resc_backend.db.model.tag import DBtag
2426
from resc_backend.db.model.vcs_instance import DBVcsInstance

components/resc-backend/src/resc_backend/db/model/rule.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ class DBrule(Base):
1515
allow_list = Column(Integer, ForeignKey(DBruleAllowList.id_), nullable=True)
1616
rule_name = Column(String(400), nullable=False)
1717
description = Column(String(2000), nullable=True)
18-
tags = Column(String(2000), nullable=True)
1918
entropy = Column(Float, nullable=True)
2019
secret_group = Column(Integer, nullable=True)
2120
regex = Column(Text, nullable=True)
@@ -24,7 +23,7 @@ class DBrule(Base):
2423
__table_args__ = (UniqueConstraint("rule_name", "rule_pack", name="unique_rule_name_per_rule_pack_version"),)
2524

2625
def __init__(self, rule_pack: str, rule_name: str, description: str, allow_list: int = None,
27-
entropy: float = None, secret_group: str = None, regex: str = None, path: str = None, tags: str = None,
26+
entropy: float = None, secret_group: str = None, regex: str = None, path: str = None,
2827
keywords: str = None):
2928
self.rule_pack = rule_pack
3029
self.allow_list = allow_list
@@ -34,18 +33,16 @@ def __init__(self, rule_pack: str, rule_name: str, description: str, allow_list:
3433
self.secret_group = secret_group
3534
self.regex = regex
3635
self.path = path
37-
self.tags = tags
3836
self.keywords = keywords
3937

4038
@staticmethod
41-
def create_from_metadata(rule_pack: str, rule_name: str, description: str, tags: str, entropy: float,
39+
def create_from_metadata(rule_pack: str, rule_name: str, description: str, entropy: float,
4240
secret_group: str, regex: str, path: str, keywords: str,
4341
allow_list: int):
4442
db_rule = DBrule(
4543
rule_pack=rule_pack,
4644
rule_name=rule_name,
4745
description=description,
48-
tags=tags,
4946
entropy=entropy,
5047
secret_group=secret_group,
5148
regex=regex,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Third Party
2+
from sqlalchemy import Column, ForeignKey, Integer
3+
4+
# First Party
5+
from resc_backend.db.model import Base
6+
7+
8+
class DBruleTag(Base):
9+
__tablename__ = "rule_tag"
10+
rule_id = Column(Integer, ForeignKey("rules.id"), primary_key=True)
11+
tag_id = Column(Integer, ForeignKey("tag.id"), primary_key=True)
12+
13+
def __init__(self, rule_id: int, tag_id: int):
14+
15+
self.rule_id = rule_id
16+
self.tag_id = tag_id
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# pylint: disable=R0902
2+
# Third Party
3+
from sqlalchemy import Column, Integer, String, UniqueConstraint
4+
5+
# First Party
6+
from resc_backend.db.model import Base
7+
8+
9+
class DBtag(Base):
10+
__tablename__ = "tag"
11+
id_ = Column("id", Integer, primary_key=True)
12+
name = Column(String(200), nullable=False)
13+
__table_args__ = (UniqueConstraint("name", name="unique_tag"),)
14+
15+
def __init__(self, name: str):
16+
self.name = name

components/resc-backend/src/resc_backend/resc_web_service/crud/detailed_finding.py

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# pylint: disable=R0912,C0121
1+
# pylint: disable=R0912,C0121,R0915
22
# Standard Library
33
from typing import List
44

@@ -46,6 +46,18 @@ def get_detailed_findings(db_connection: Session, findings_filter: FindingsFilte
4646
func.max(model.DBaudit.id_).label("audit_id")) \
4747
.group_by(model.DBaudit.finding_id).subquery()
4848

49+
rule_tag_subquery = db_connection.query(model.DBruleTag.rule_id) \
50+
.join(model.DBtag, model.DBruleTag.tag_id == model.DBtag.id_)
51+
if findings_filter.rule_tags:
52+
rule_tag_subquery = rule_tag_subquery.filter(model.DBtag.name.in_(findings_filter.rule_tags))
53+
if findings_filter.rule_pack_versions or findings_filter.rule_names:
54+
rule_tag_subquery = rule_tag_subquery.join(model.DBrule, model.DBrule.id_ == model.DBruleTag.rule_id)
55+
if findings_filter.rule_pack_versions:
56+
rule_tag_subquery = rule_tag_subquery.filter(model.DBrule.rule_pack.in_(findings_filter.rule_pack_versions))
57+
if findings_filter.rule_names:
58+
rule_tag_subquery = rule_tag_subquery.filter(model.DBrule.rule_name.in_(findings_filter.rule_names))
59+
rule_tag_subquery = rule_tag_subquery.group_by(model.DBruleTag.rule_id).subquery()
60+
4961
limit_val = MAX_RECORDS_PER_PAGE_LIMIT if limit > MAX_RECORDS_PER_PAGE_LIMIT else limit
5062

5163
query = db_connection.query(
@@ -96,8 +108,8 @@ def get_detailed_findings(db_connection: Session, findings_filter: FindingsFilte
96108
if findings_filter.rule_tags:
97109
query = query.join(model.DBrule, and_(model.DBrule.rule_name == model.DBfinding.rule_name,
98110
model.DBrule.rule_pack == model.DBscan.rule_pack))
99-
for tag in findings_filter.rule_tags:
100-
query = query.filter(model.DBrule.tags.like(f"%{tag}%"))
111+
query = query.join(rule_tag_subquery, model.DBrule.id_ == rule_tag_subquery.c.rule_id)
112+
101113
if findings_filter.rule_pack_versions:
102114
query = query.filter(model.DBscan.rule_pack.in_(findings_filter.rule_pack_versions))
103115
if findings_filter.start_date_time:
@@ -158,6 +170,18 @@ def get_detailed_findings_count(db_connection: Session, findings_filter: Finding
158170
model.DBscan.rule_pack.in_(findings_filter.rule_pack_versions))
159171
max_base_scan_subquery = max_base_scan_subquery.group_by(model.DBscan.branch_id).subquery()
160172

173+
rule_tag_subquery = db_connection.query(model.DBruleTag.rule_id) \
174+
.join(model.DBtag, model.DBruleTag.tag_id == model.DBtag.id_)
175+
if findings_filter.rule_tags:
176+
rule_tag_subquery = rule_tag_subquery.filter(model.DBtag.name.in_(findings_filter.rule_tags))
177+
if findings_filter.rule_pack_versions or findings_filter.rule_names:
178+
rule_tag_subquery = rule_tag_subquery.join(model.DBrule, model.DBrule.id_ == model.DBruleTag.rule_id)
179+
if findings_filter.rule_pack_versions:
180+
rule_tag_subquery = rule_tag_subquery.filter(model.DBrule.rule_pack.in_(findings_filter.rule_pack_versions))
181+
if findings_filter.rule_names:
182+
rule_tag_subquery = rule_tag_subquery.filter(model.DBrule.rule_name.in_(findings_filter.rule_names))
183+
rule_tag_subquery = rule_tag_subquery.group_by(model.DBruleTag.rule_id).subquery()
184+
161185
query = db_connection.query(func.count(model.DBfinding.id_))
162186

163187
query = query.join(model.DBscanFinding, model.DBfinding.id_ == model.DBscanFinding.finding_id)
@@ -184,8 +208,7 @@ def get_detailed_findings_count(db_connection: Session, findings_filter: Finding
184208
if findings_filter.rule_tags:
185209
query = query.join(model.DBrule, and_(model.DBrule.rule_name == model.DBfinding.rule_name,
186210
model.DBrule.rule_pack == model.DBscan.rule_pack))
187-
for tag in findings_filter.rule_tags:
188-
query = query.filter(model.DBrule.tags.like(f"%{tag}%"))
211+
query = query.join(rule_tag_subquery, model.DBrule.id_ == rule_tag_subquery.c.rule_id)
189212

190213
if findings_filter.rule_pack_versions:
191214
query = query.filter(model.DBscan.rule_pack.in_(findings_filter.rule_pack_versions))

components/resc-backend/src/resc_backend/resc_web_service/crud/finding.py

+18-15
Original file line numberDiff line numberDiff line change
@@ -189,20 +189,6 @@ def get_total_findings_count(db_connection: Session, findings_filter: FindingsFi
189189
model.repository.DBrepository.id_ == model.branch.DBbranch.repository_id) \
190190
.join(model.DBVcsInstance,
191191
model.vcs_instance.DBVcsInstance.id_ == model.repository.DBrepository.vcs_instance)
192-
elif findings_filter.rule_tags:
193-
max_scan_subquery = db_connection.query(model.DBscanFinding.finding_id,
194-
func.max(model.DBscanFinding.scan_id).label("scan_id"))
195-
max_scan_subquery = max_scan_subquery.group_by(model.DBscanFinding.finding_id).subquery()
196-
total_count_query = total_count_query.join(max_scan_subquery,
197-
model.finding.DBfinding.id_ == max_scan_subquery.c.finding_id) \
198-
.join(model.DBscan, model.scan.DBscan.id_ == max_scan_subquery.c.scan_id)
199-
200-
if findings_filter.rule_tags:
201-
total_count_query = total_count_query.join(model.DBrule,
202-
and_(model.DBrule.rule_name == model.DBfinding.rule_name,
203-
model.DBrule.rule_pack == model.DBscan.rule_pack))
204-
for tag in findings_filter.rule_tags:
205-
total_count_query = total_count_query.filter(model.DBrule.tags.like(f"%{tag}%"))
206192

207193
if findings_filter.start_date_time:
208194
total_count_query = total_count_query.filter(
@@ -401,13 +387,16 @@ def get_findings_count_by_status(db_connection: Session, scan_ids: List[int] = N
401387
return findings_count_by_status
402388

403389

404-
def get_rule_findings_count_by_status(db_connection: Session, rule_pack_versions: [str] = None):
390+
def get_rule_findings_count_by_status(db_connection: Session, rule_pack_versions: [str] = None,
391+
rule_tags: [str] = None):
405392
"""
406393
Retrieve count of findings based on rulename and status
407394
:param db_connection:
408395
Session of the database connection
409396
:param rule_pack_versions:
410397
optional, filter on rule pack version
398+
:param rule_tags:
399+
optional, filter on rule tag
411400
:return: findings_count
412401
per rulename and status the count of findings
413402
"""
@@ -430,6 +419,20 @@ def get_rule_findings_count_by_status(db_connection: Session, rule_pack_versions
430419
query = query.join(max_base_scan_subquery, model.DBfinding.branch_id == max_base_scan_subquery.c.branch_id)
431420
query = query.join(model.DBscan, and_(model.DBscanFinding.scan_id == model.DBscan.id_,
432421
model.DBscan.id_ >= max_base_scan_subquery.c.latest_base_scan_id))
422+
if rule_tags:
423+
rule_tag_subquery = db_connection.query(model.DBruleTag.rule_id) \
424+
.join(model.DBtag, model.DBruleTag.tag_id == model.DBtag.id_)
425+
if rule_pack_versions:
426+
rule_tag_subquery = rule_tag_subquery.join(model.DBrule, model.DBrule.id_ == model.DBruleTag.rule_id)
427+
rule_tag_subquery = rule_tag_subquery.filter(model.DBrule.rule_pack.in_(rule_pack_versions))
428+
429+
rule_tag_subquery = rule_tag_subquery.filter(model.DBtag.name.in_(rule_tags))
430+
rule_tag_subquery = rule_tag_subquery.group_by(model.DBruleTag.rule_id).subquery()
431+
432+
query = query.join(model.DBrule, and_(model.DBrule.rule_name == model.DBfinding.rule_name,
433+
model.DBrule.rule_pack == model.DBscan.rule_pack))
434+
query = query.join(rule_tag_subquery, model.DBrule.id_ == rule_tag_subquery.c.rule_id)
435+
433436
if rule_pack_versions:
434437
query = query.filter(model.DBscan.rule_pack.in_(rule_pack_versions))
435438

components/resc-backend/src/resc_backend/resc_web_service/crud/rule.py

-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def create_rule(db_connection: Session, rule: RuleCreate):
6262
db_rule = model.rule.DBrule(
6363
rule_name=rule.rule_name,
6464
description=rule.description,
65-
tags=rule.tags,
6665
entropy=rule.entropy,
6766
secret_group=rule.secret_group,
6867
regex=rule.regex,
@@ -92,7 +91,6 @@ def get_rules_by_rule_pack_version(db_connection: Session, rule_pack_version: st
9291
model.DBrule.id_,
9392
model.DBrule.rule_pack,
9493
model.DBrule.rule_name,
95-
model.DBrule.tags,
9694
model.DBrule.entropy,
9795
model.DBrule.secret_group,
9896
model.DBrule.regex,

0 commit comments

Comments
 (0)