Update 12 parsers to have better metadata (#11900)

* Added docstring for the burp parser * Moved docstring to top of file and made changes to pass liner * Fixed format for ruff linter * Final changes for ruff linter: * Added aqua v1 and v2 parser * Ruff: Fix I001, E302, W291, E265 * Added docstring for bandit and checkmarx * Ruff: Fix W293 * Added Docstrings for Cargo_Audit, Brakeman, and Zap * Ruff: Fix W291 in Cargo_Audit/parser.py * Added docstrings for gitleaks, qualys, semgrep, snyk --------- Co-authored-by: Jino Tesauro <jinotesauro@pop-os.localdomain>
DefectDojo · Feb 27, 2025 · 8a24ee6 · 8a24ee6
1 parent 58e39f4
commit 8a24ee6
Show file tree

Hide file tree

Showing 11 changed files with 581 additions and 16 deletions.
diff --git a/dojo/tools/aqua/parser.py b/dojo/tools/aqua/parser.py
@@ -4,6 +4,94 @@
 
 
 class AquaParser:
+
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Aqua Parser.
+
+        Fields:
+        - title: Made by combining cve, resource_name, and resource_version.
+        - severity: Severity converted from Aqua format into Defect Dojo format.
+        - severity_justification: Set to justification returned by Aqua scanner.
+        - cvssv3: Defined based on the output of the Aqua Scanner.
+        - description: Set to description returned from Aqua Scanner. If no description is present set to "no description".
+        - mitigation: Set to fix_version returned from Aqua Scanner.
+        - references: Set to url returned from Aqua Scanner.
+        - component_name: Set to name returned from Aqua Scanner.
+        - component_version: Set to version returned from Aqua Scanner.
+        - impact: Set to same value as severity.
+        - epss_score: Set to epss_score returned from scanner if it exists.
+        - epss_percentile: Set to epss_percentile returned from scanner if it exists.
+        """
+        return [
+            "title",
+            "severity",
+            "severity_justification",
+            "cvssv3",
+            "description",
+            "mitigation",
+            "references",
+            "component_name",
+            "component_version",
+            "impact",
+            "epss_score",
+            "epss_percentile",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of fields used for deduplication in the Aqua Parser.
+
+        Fields:
+        - severity: Severity converted from Aqua format into Defect Dojo format.
+        - component_name: Set to name returned from Aqua Scanner.
+        - component_version: Set to version returned from Aqua Scanner.
+
+        #NOTE: vulnerability_ids is not provided by parser
+        """
+        return [
+            "severity",
+            "component_name",
+            "component_version",
+        ]
+
+    # Jino This get_fields was written for the Aque Parser v2 (based off of "get_iten_v2")
+    # What do we do with the seperate versions of this parser?
+    # def get_fields(self) -> list[str]:
+    #     """
+    #     Return the list of fields used in the Aqua Parser V2
+    #
+    #     Fields:
+    #     - title: Created by combining the finding's cve and file_path
+    #     - description: Text describing finding
+    #     - url: Url associated with the finding
+    #     - severity: Severity rating converted from Aqua's integer format into DefectDojo's format.
+    #       #Jino: On line 106 it calls severity_of instead of aqua_severity_of. get_item v1 uses aqua_severity_of#
+    #     - impact: Impact rating of finding. Same as the finding severity.
+    #     - mitigation: If solution is true, mitigation equals true. If fix_version is true, mitigation equals 'Upgrade to True'.If neither are true mitigation equals 'No known mitigation'.
+    #     """
+    #     return [
+    #         "title",
+    #         "description",
+    #         "url",
+    #         "severity",
+    #         "impact",
+    #         "mitigation",
+    #     ]
+    # Dedupe for v2 based on default dedupe values
+    # def get_dedupe_fields(self) -> list[str]:
+    #     """
+    #     Return the list of fields used for deduplication in the Aqua Parser V2.
+    #
+    #     Fields:
+    #     - title: Created by combining the finding's cve and file_path
+    #     - description: Text describing finding
+    #     """
+    #     #NOTE: vulnerability_ids is not provided by parser
+    #     return [
+    #         "title",
+    #         "description",
+    #     ]
     def get_scan_types(self):
         return ["Aqua Scan"]
 

diff --git a/dojo/tools/bandit/parser.py b/dojo/tools/bandit/parser.py
@@ -6,6 +6,49 @@
 
 
 class BanditParser:
+
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Bandit Parser.
+
+        Fields:
+        - title: Set to the issue_text outputted by th Bandit Scanner.
+        - description: Custom description made from: test_name, test_id, filename, line_number, issue_confidence, and code segments.
+        - severity: Set to issue_severity from Bandit Scanner.
+        - file_path: Set to filename from Bandit Scanner.
+        - line: Set to line from Bandit Scanner.
+        - date: Set to date from Bandit Scanner.
+        - vuln_id_from_tool: Made from joining test_name and test_id.
+        - nb_occurences: Initially set to 1 then updated.
+        - scanner_condifence: Set to confidence value if one is returned from the Bandit Scanner.
+        """
+        return [
+            "title",
+            "description",
+            "severity",
+            "file_path",
+            "line",
+            "date",
+            "vuln_id_from_tool",
+            "nb_occurences",
+            "scanner_confidence",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of fields used for deduplication in the Bandit Parser.
+
+        Fields:
+        - file_path: Set to filename from Bandit Scanner.
+        - line: Set to line from Bandit Scanner.
+        - vuln_id_from_tool: Made from joining test_name and test_id.
+        """
+        return [
+            "file_path",
+            "line",
+            "vuln_id_from_tool",
+        ]
+
     def get_scan_types(self):
         return ["Bandit Scan"]
 

diff --git a/dojo/tools/brakeman/parser.py b/dojo/tools/brakeman/parser.py
@@ -8,6 +8,46 @@
 
 
 class BrakemanParser:
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Brakeman Parser.
+
+        Fields:
+        - title: Made by joining warning_type and message provided by Brakeman Scanner.
+        - description: Made by joining filename, line number, issue confidence, code, user input, and render path provided by Brakeman Scanner.
+        - severity: Set to Medium regardless of context.
+        - file_path: Set to file from Brakeman Scanner.
+        - line: Set to line from Brakeman Scanner.
+        - date: Set to end_date from Brakeman Scanner.
+        """
+        return [
+            "title",
+            "description",
+            "severity",
+            "file_path",
+            "line",
+            "date",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of fields used for deduplication in the Brakeman Parser.
+
+        Fields:
+        - title: Made by joining warning_type and message provided by Brakeman Scanner.
+        - line: Set to line from Brakeman Scanner.
+        - file_path: Set to file from Brakeman Scanner.
+        - description: Made by joining filename, line number, issue confidence, code, user input, and render path provided by Brakeman Scanner.
+
+        NOTE: uses legacy dedupe: ['title', 'cwe', 'line', 'file_path', 'description']
+        """
+        return [
+            "title",
+            "line",
+            "file_path",
+            "description",
+        ]
+
     def get_scan_types(self):
         return ["Brakeman Scan"]
 

diff --git a/dojo/tools/burp/parser.py b/dojo/tools/burp/parser.py
@@ -19,6 +19,54 @@ class BurpParser:
     TODO Test burp output version. Handle what happens if the parser doesn't support it.
     """
 
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Burp Parser
+
+        Fields:
+        - title: Made using Burp scanner output's name.
+        - url: URL outputted by Burp Scanner.
+        - severity: Severity outputted by Burp Scanner.
+        - param: Burp parameters combined to form param.
+        - scanner_confidence: Converted from Burp format (Certain, Firm, or Tentative) into Defect Dojo integer format.
+        - description: Made by combining URL, url_host, path, and detail.
+        - mitigation: Made using Remediation that was ouputted by Burp scanner
+        - impact: Set to background returned by Burp Scanner.
+        - unique_id_from_tool: Set to serial_number returned by Burp Scanner.
+        - vuln_id_from_tool: Taken from output of Burp Scanner.
+        - cwe: Set to cwe outputted from Burp Scanner. Multiple cwes is not supported by parser.
+        """
+        return [
+            "title",
+            "url",
+            "severity",
+            "param",
+            "scanner_confidence",
+            "description",
+            "mitigation",
+            "impact",
+            "unique_id_from_tool",
+            "vuln_id_from_tool",
+            "cwe",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of dedupe fields used in the Burp Parser
+
+        Fields:
+        - title: Made using Burp scanner output's name.
+        - cwe: Set to cwe outputted from Burp Scanner. Multiple cwes is not supported by parser.
+        - description: Made by combining URL, url_host, path, and detail.
+
+        NOTE: uses legacy dedupe: ['title', 'cwe', 'line', 'file_path', 'description']
+        """
+        return [
+            "title",
+            "cwe",
+            "description",
+        ]
+
     def get_scan_types(self):
         return ["Burp Scan"]
 

diff --git a/dojo/tools/cargo_audit/parser.py b/dojo/tools/cargo_audit/parser.py
@@ -8,6 +8,58 @@ class CargoAuditParser:
 
     """A class that can be used to parse the cargo audit JSON report file"""
 
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Cargo Audit Parser.
+
+        Fields:
+        - title: Set to the title from Cargo Audit Scanner
+        - severity: Set to "High" regardless of context.
+        - tags: Set to the tags from Cargo Audit Scanner if they are provided.
+        - description: Set to the description from Cargo Audit Scanner and joined with URL provided.
+        - component_name: Set to name of package provided by the Cargo Audit Scanner.
+        - component_version: Set to version of package provided by the Cargo Audit Scanner.
+        - vuln_id_from_tool: Set to id provided by the Cargo Audit Scanner.
+        - publish_date: Set to date provided by the Cargo Audit Scanner.
+        - nb_occurences: Set to 1 by the parser.
+        - mitigation: Set to package_name and versions if information is available.
+
+        NOTE: This parser supports tags
+        """
+        return [
+            "title",
+            "severity",
+            "tags",
+            "description",
+            "component_name",
+            "component_version",
+            "vuln_id_from_tool",
+            "publish_date",
+            "nb_occurences",
+            "mitigation",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of fields used for deduplication in the Cargo Audit Parser.
+
+        Fields:
+        - vulnerability_ids:
+        - severity: Set to "High" regardless of context.
+        - component_name: Set to name of package provided by the Cargo Audit Scanner.
+        - component_version: Set to version of package provided by the Cargo Audit Scanner.
+        - vuln_id_from_tool: Set to id provided by the Cargo Audit Scanner.
+
+        NOTE: Dedupe fields in settings.dist.py list vulnerability_ids and vuln_id_from_tool
+        """
+        return [
+            "vulnerability_ids",
+            "severity",
+            "component_name",
+            "component_version",
+            "vuln_id_from_tool",
+        ]
+
     def get_scan_types(self):
         return ["CargoAudit Scan"]
 

diff --git a/dojo/tools/checkmarx/parser.py b/dojo/tools/checkmarx/parser.py
@@ -12,6 +12,69 @@
 
 
 class CheckmarxParser:
+
+    def get_fields(self) -> list[str]:
+        """
+        Return the list of fields used in the Checkmarx Parser.
+
+        Fields:
+        - title: Constructed from output of Checkmarx Scanner.
+        - cwe: Set to cwe outputted by Checkmarx Parser.
+        - active: Set to boolean value based on state returned by Checkmarx Parser.
+        - verified: Set to boolean value based on state returned by Checkmarx Parser.
+        - false_p: Set to boolean value based on "falsePositive" returned by Checkmarx Parser.
+        - description: Made from combining linenumber, column, source object, and number.
+        - severity: Set to severity outputted by Checkmarx Scanner.
+        - file_path: Set to filename outputted by Checkmarx Scanner.
+        - date: Set to date outputted by Checkmarx Scanner.
+        - nb_occurences: Inittially set to 1 and then updated accordingly.
+        - line: Set to line outputted by Checkmarx Scanner.
+        - unique_id_from_tool: [If mode set to detailed] Set to the unique pathId outputted by Checkmarx Parser.
+        - sast_source_object: [If mode set to detailed] Set to sourceObject outputted by Checkmarx Parser.
+        - sast_sink_object: [If mode set to detailed] Set to sinkObject outputted by Checkmarx Parser.
+        - sast_source_line: [If mode set to detailed] Set to sourceLineNumber outputted by Checkmarx Parser.
+        - sast_source_file_path: [If mode set to detailed] Set to sourceFilename outputted by Checkmarx Parser.
+        - vuln_id_from_tool: Set to id from Checkmarx Scanner.
+        - component_name: Set to value within the "name" returned from the Checkmarx Scanner.
+        - component_version: Set to value within the "name" returned from the Checkmarx Scanner.
+        """
+        return [
+            "title"
+            "cwe",
+            "active",
+            "verified",
+            "false_p",
+            "description",
+            "severity",
+            "file_path",
+            "date",
+            "nb_occurences",
+            "line",
+            "unique_id_from_tool",
+            "sast_source_object",
+            "sast_sink_object",
+            "sast_source_line",
+            "sast_source_file_path",
+            "vuln_id_from_tool",
+            "component_name",
+            "component_version",
+        ]
+
+    def get_dedupe_fields(self) -> list[str]:
+        """
+        Return the list of fields used for deduplication in the Checkmarx Parser.
+
+        Fields:
+        - cwe: Set to cwe outputted by Checkmarx Parser.
+        - severity: Set to severity outputted by Checkmarx Scanner.
+        - file_path: Set to filename outputted by Checkmarx Scanner.
+        """
+        return [
+            "cwe",
+            "severity",
+            "file_path",
+        ]
+
     def get_scan_types(self):
         return ["Checkmarx Scan", "Checkmarx Scan detailed"]